1
/*****************************************************************************
3
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
4
Copyright (c) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc.,
22
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
*****************************************************************************/
26
/**************************************************//**
27
@file sync/sync0sync.cc
28
Mutex, the basic synchronization primitive
30
Created 9/5/1995 Heikki Tuuri
31
*******************************************************/
33
#include "sync0sync.h"
35
#include "sync0sync.ic"
36
#include "sync0arr.ic"
42
#include "buf0types.h"
43
#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
44
#ifdef UNIV_SYNC_DEBUG
45
# include "srv0start.h" /* srv_is_being_started */
46
#endif /* UNIV_SYNC_DEBUG */
47
#include "ha_prototypes.h"
50
REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
51
============================================
53
Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
54
takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
55
Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
56
implement our own efficient spin lock mutex. Future operating systems may
57
provide efficient spin locks, but we cannot count on that.
59
Another reason for implementing a spin lock is that on multiprocessor systems
60
it can be more efficient for a processor to run a loop waiting for the
61
semaphore to be released than to switch to a different thread. A thread switch
62
takes 25 us on both platforms mentioned above. See Gray and Reuter's book
63
Transaction processing for background.
65
How long should the spin loop last before suspending the thread? On a
66
uniprocessor, spinning does not help at all, because if the thread owning the
67
mutex is not executing, it cannot be released. Spinning actually wastes
70
On a multiprocessor, we do not know if the thread owning the mutex is
71
executing or not. Thus it would make sense to spin as long as the operation
72
guarded by the mutex would typically last assuming that the thread is
73
executing. If the mutex is not released by that time, we may assume that the
74
thread owning the mutex is not executing and suspend the waiting thread.
76
A typical operation (where no i/o involved) guarded by a mutex or a read-write
77
lock may last 1 - 20 us on the current Pentium platform. The longest
78
operations are the binary searches on an index node.
80
We conclude that the best choice is to set the spin time at 20 us. Then the
81
system should work well on a multiprocessor. On a uniprocessor we have to
82
make sure that thread swithches due to mutex collisions are not frequent,
83
i.e., they do not happen every 100 us or so, because that wastes too much
84
resources. If the thread switches are not frequent, the 20 us wasted in spin
87
Empirical studies on the effect of spin time should be done for different
91
IMPLEMENTATION OF THE MUTEX
92
===========================
94
For background, see Curt Schimmel's book on Unix implementation on modern
95
architectures. The key points in the implementation are atomicity and
96
serialization of memory accesses. The test-and-set instruction (XCHG in
97
Pentium) must be atomic. As new processors may have weak memory models, also
98
serialization of memory references may be necessary. The successor of Pentium,
99
P6, has at least one mode where the memory model is weak. As far as we know,
100
in Pentium all memory accesses are serialized in the program order and we do
101
not have to worry about the memory model. On other processors there are
102
special machine instructions called a fence, memory barrier, or storage
103
barrier (STBAR in Sparc), which can be used to serialize the memory accesses
104
to happen in program order relative to the fence instruction.
106
Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
107
the atomic test-and-set, but his algorithm should be modified for weak memory
108
models. We do not use Lamport's algorithm, because we guess it is slower than
109
the atomic test-and-set.
111
Our mutex implementation works as follows: After that we perform the atomic
112
test-and-set instruction on the memory word. If the test returns zero, we
113
know we got the lock first. If the test returns not zero, some other thread
114
was quicker and got the lock: then we spin in a loop reading the memory word,
115
waiting it to become zero. It is wise to just read the word in the loop, not
116
perform numerous test-and-set instructions, because they generate memory
117
traffic between the cache and the main memory. The read loop can just access
118
the cache, saving bus bandwidth.
120
If we cannot acquire the mutex lock in the specified time, we reserve a cell
121
in the wait array, set the waiters byte in the mutex to 1. To avoid a race
122
condition, after setting the waiters byte and before suspending the waiting
123
thread, we still have to check that the mutex is reserved, because it may
124
have happened that the thread which was holding the mutex has just released
125
it and did not see the waiters byte set to 1, a case which would lead the
126
other thread to an infinite wait.
128
LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
130
thread will eventually call os_event_set() on that particular event.
131
Thus no infinite wait is possible in this case.
133
Proof: After making the reservation the thread sets the waiters field in the
134
mutex to 1. Then it checks that the mutex is still reserved by some thread,
135
or it reserves the mutex for itself. In any case, some thread (which may be
136
also some earlier thread, not necessarily the one currently holding the mutex)
137
will set the waiters field to 0 in mutex_exit, and then call
138
os_event_set() with the mutex as an argument.
141
LEMMA 2: If an os_event_set() call is made after some thread has called
143
the os_event_reset() and before it starts wait on that event, the call
144
will not be lost to the second thread. This is true even if there is an
145
intervening call to os_event_reset() by another thread.
146
Thus no infinite wait is possible in this case.
148
Proof (non-windows platforms): os_event_reset() returns a monotonically
149
increasing value of signal_count. This value is increased at every
150
call of os_event_set() If thread A has called os_event_reset() followed
151
by thread B calling os_event_set() and then some other thread C calling
152
os_event_reset(), the is_set flag of the event will be set to FALSE;
153
but now if thread A calls os_event_wait_low() with the signal_count
154
value returned from the earlier call of os_event_reset(), it will
155
return immediately without waiting.
158
Proof (windows): If there is a writer thread which is forced to wait for
159
the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
160
The design of rw_lock ensures that there is one and only one thread
161
that is able to change the state to RW_LOCK_WAIT_EX and this thread is
162
guaranteed to acquire the lock after it is released by the current
163
holders and before any other waiter gets the lock.
164
On windows this thread waits on a separate event i.e.: wait_ex_event.
165
Since only one thread can wait on this event there is no chance
166
of this event getting reset before the writer starts wait on it.
167
Therefore, this thread is guaranteed to catch the os_set_event()
168
signalled unconditionally at the release of the lock.
171
/* Number of spin waits on mutexes: for performance monitoring */
173
/** The number of iterations in the mutex_spin_wait() spin loop.
174
Intended for performance monitoring. */
175
static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count;
176
/** The number of mutex_spin_wait() calls. Intended for
177
performance monitoring. */
178
static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count;
179
/** The number of OS waits in mutex_spin_wait(). Intended for
180
performance monitoring. */
181
static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count;
182
/** The number of mutex_exit() calls. Intended for performance
184
UNIV_INTERN ib_int64_t mutex_exit_count;
186
/** This variable is set to TRUE when sync_init is called */
187
UNIV_INTERN ibool sync_initialized = FALSE;
189
#ifdef UNIV_SYNC_DEBUG
190
/** An acquired mutex or rw-lock and its level in the latching order */
192
/** Mutexes or rw-locks held by a thread */
193
struct sync_thread_t;
195
/** The latch levels currently owned by threads are stored in this data
196
structure; the size of this array is OS_THREAD_MAX_N */
198
UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
200
/** Mutex protecting sync_thread_level_arrays */
201
UNIV_INTERN ib_mutex_t sync_thread_mutex;
203
# ifdef UNIV_PFS_MUTEX
204
UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key;
205
# endif /* UNIV_PFS_MUTEX */
206
#endif /* UNIV_SYNC_DEBUG */
208
/** Global list of database mutexes (not OS mutexes) created. */
209
UNIV_INTERN ut_list_base_node_t mutex_list;
211
/** Mutex protecting the mutex_list variable */
212
UNIV_INTERN ib_mutex_t mutex_list_mutex;
214
#ifdef UNIV_PFS_MUTEX
215
UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key;
216
#endif /* UNIV_PFS_MUTEX */
218
#ifdef UNIV_SYNC_DEBUG
219
/** Latching order checks start when this is set TRUE */
220
UNIV_INTERN ibool sync_order_checks_on = FALSE;
222
/** Number of slots reserved for each OS thread in the sync level array */
223
static const ulint SYNC_THREAD_N_LEVELS = 10000;
225
/** Array for tracking sync levels per thread. */
227
ulint in_use; /*!< Number of active cells */
228
ulint n_elems; /*!< Number of elements in the array */
229
ulint max_elems; /*!< Maximum elements */
230
ulint next_free; /*!< ULINT_UNDEFINED or index of next
232
sync_level_t* elems; /*!< Array elements */
235
/** Mutexes or rw-locks held by a thread */
236
struct sync_thread_t{
237
os_thread_id_t id; /*!< OS thread id */
238
sync_arr_t* levels; /*!< level array for this thread; if
239
this is NULL this slot is unused */
242
/** An acquired mutex or rw-lock and its level in the latching order */
244
void* latch; /*!< pointer to a mutex or an
245
rw-lock; NULL means that
247
ulint level; /*!< level of the latch in the
248
latching order. This field is
249
overloaded to serve as a node in a
250
linked list of free nodes too. When
251
latch == NULL then this will contain
252
the ordinal value of the next free
255
#endif /* UNIV_SYNC_DEBUG */
257
/******************************************************************//**
258
Creates, or rather, initializes a mutex object in a specified memory
259
location (which must be appropriately aligned). The mutex is initialized
260
in the reset state. Explicit freeing of the mutex with mutex_free is
261
necessary only if the memory block containing it is freed. */
266
ib_mutex_t* mutex, /*!< in: pointer to memory */
268
const char* cmutex_name, /*!< in: mutex name */
269
# ifdef UNIV_SYNC_DEBUG
270
ulint level, /*!< in: level */
271
# endif /* UNIV_SYNC_DEBUG */
272
#endif /* UNIV_DEBUG */
273
const char* cfile_name, /*!< in: file name where created */
274
ulint cline) /*!< in: file line where created */
276
#if defined(HAVE_ATOMIC_BUILTINS)
277
mutex_reset_lock_word(mutex);
279
os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex);
280
mutex->lock_word = 0;
282
mutex->event = os_event_create();
283
mutex_set_waiters(mutex, 0);
285
mutex->magic_n = MUTEX_MAGIC_N;
286
#endif /* UNIV_DEBUG */
287
#ifdef UNIV_SYNC_DEBUG
289
mutex->file_name = "not yet reserved";
290
mutex->level = level;
291
#endif /* UNIV_SYNC_DEBUG */
292
mutex->cfile_name = cfile_name;
293
mutex->cline = cline;
294
mutex->count_os_wait = 0;
296
/* Check that lock_word is aligned; this is important on Intel */
297
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
299
/* NOTE! The very first mutexes are not put to the mutex list */
301
if ((mutex == &mutex_list_mutex)
302
#ifdef UNIV_SYNC_DEBUG
303
|| (mutex == &sync_thread_mutex)
304
#endif /* UNIV_SYNC_DEBUG */
310
mutex_enter(&mutex_list_mutex);
312
ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
313
|| UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
315
UT_LIST_ADD_FIRST(list, mutex_list, mutex);
317
mutex_exit(&mutex_list_mutex);
320
/******************************************************************//**
321
NOTE! Use the corresponding macro mutex_free(), not directly this function!
322
Calling this function is obligatory only if the memory buffer containing
323
the mutex is freed. Removes a mutex object from the mutex list. The mutex
324
is checked to be in the reset state. */
329
ib_mutex_t* mutex) /*!< in: mutex */
331
ut_ad(mutex_validate(mutex));
332
ut_a(mutex_get_lock_word(mutex) == 0);
333
ut_a(mutex_get_waiters(mutex) == 0);
335
#ifdef UNIV_MEM_DEBUG
336
if (mutex == &mem_hash_mutex) {
337
ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
338
ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
339
UT_LIST_REMOVE(list, mutex_list, mutex);
342
#endif /* UNIV_MEM_DEBUG */
344
if (mutex != &mutex_list_mutex
345
#ifdef UNIV_SYNC_DEBUG
346
&& mutex != &sync_thread_mutex
347
#endif /* UNIV_SYNC_DEBUG */
350
mutex_enter(&mutex_list_mutex);
352
ut_ad(!UT_LIST_GET_PREV(list, mutex)
353
|| UT_LIST_GET_PREV(list, mutex)->magic_n
355
ut_ad(!UT_LIST_GET_NEXT(list, mutex)
356
|| UT_LIST_GET_NEXT(list, mutex)->magic_n
359
UT_LIST_REMOVE(list, mutex_list, mutex);
361
mutex_exit(&mutex_list_mutex);
364
os_event_free(mutex->event);
365
#ifdef UNIV_MEM_DEBUG
367
#endif /* UNIV_MEM_DEBUG */
368
#if !defined(HAVE_ATOMIC_BUILTINS)
369
os_fast_mutex_free(&(mutex->os_fast_mutex));
371
/* If we free the mutex protecting the mutex list (freeing is
372
not necessary), we have to reset the magic number AFTER removing
376
#endif /* UNIV_DEBUG */
380
/********************************************************************//**
381
NOTE! Use the corresponding macro in the header file, not this function
382
directly. Tries to lock the mutex for the current thread. If the lock is not
383
acquired immediately, returns with return value 1.
384
@return 0 if succeed, 1 if not */
387
mutex_enter_nowait_func(
388
/*====================*/
389
ib_mutex_t* mutex, /*!< in: pointer to mutex */
390
const char* file_name __attribute__((unused)),
391
/*!< in: file name where mutex
393
ulint line __attribute__((unused)))
394
/*!< in: line where requested */
396
ut_ad(mutex_validate(mutex));
398
if (!ib_mutex_test_and_set(mutex)) {
400
ut_d(mutex->thread_id = os_thread_get_curr_id());
401
#ifdef UNIV_SYNC_DEBUG
402
mutex_set_debug_info(mutex, file_name, line);
405
return(0); /* Succeeded! */
412
/******************************************************************//**
413
Checks that the mutex has been initialized.
419
const ib_mutex_t* mutex) /*!< in: mutex */
422
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
427
/******************************************************************//**
428
Checks that the current thread owns the mutex. Works only in the debug
430
@return TRUE if owns */
435
const ib_mutex_t* mutex) /*!< in: mutex */
437
ut_ad(mutex_validate(mutex));
439
return(mutex_get_lock_word(mutex) == 1
440
&& os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
442
#endif /* UNIV_DEBUG */
444
/******************************************************************//**
445
Sets the waiters field in a mutex. */
450
ib_mutex_t* mutex, /*!< in: mutex */
451
ulint n) /*!< in: value to set */
453
volatile ulint* ptr; /* declared volatile to ensure that
454
the value is stored to memory */
457
ptr = &(mutex->waiters);
459
*ptr = n; /* Here we assume that the write of a single
460
word in memory is atomic */
463
/******************************************************************//**
464
Reserves a mutex for the current thread. If the mutex is reserved, the
465
function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
466
for the mutex before suspending the thread. */
471
ib_mutex_t* mutex, /*!< in: pointer to mutex */
472
const char* file_name, /*!< in: file name where mutex
474
ulint line) /*!< in: line where requested */
476
ulint i; /* spin round count */
477
ulint index; /* index of the reserved wait cell */
478
sync_array_t* sync_arr;
479
size_t counter_index;
481
counter_index = (size_t) os_thread_get_curr_id();
485
/* This update is not thread safe, but we don't mind if the count
486
isn't exact. Moved out of ifdef that follows because we are willing
487
to sacrifice the cost of counting this as the data is valuable.
488
Count the number of calls to mutex_spin_wait. */
489
mutex_spin_wait_count.add(counter_index, 1);
495
/* Spin waiting for the lock word to become zero. Note that we do
496
not have to assume that the read access to the lock word is atomic,
497
as the actual locking is always committed with atomic test-and-set.
498
In reality, however, all processors probably have an atomic read of
503
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
504
if (srv_spin_wait_delay) {
505
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
511
if (i == SYNC_SPIN_ROUNDS) {
515
mutex_spin_round_count.add(counter_index, i);
517
if (ib_mutex_test_and_set(mutex) == 0) {
520
ut_d(mutex->thread_id = os_thread_get_curr_id());
521
#ifdef UNIV_SYNC_DEBUG
522
mutex_set_debug_info(mutex, file_name, line);
527
/* We may end up with a situation where lock_word is 0 but the OS
528
fast mutex is still reserved. On FreeBSD the OS does not seem to
529
schedule a thread which is constantly calling pthread_mutex_trylock
530
(in ib_mutex_test_and_set implementation). Then we could end up
531
spinning here indefinitely. The following 'i++' stops this infinite
536
if (i < SYNC_SPIN_ROUNDS) {
540
sync_arr = sync_array_get_and_reserve_cell(mutex, SYNC_MUTEX,
541
file_name, line, &index);
543
/* The memory order of the array reservation and the change in the
544
waiters field is important: when we suspend a thread, we first
545
reserve the cell and then set waiters field to 1. When threads are
546
released in mutex_exit, the waiters field is first set to zero and
547
then the event is set to the signaled state. */
549
mutex_set_waiters(mutex, 1);
551
/* Try to reserve still a few times */
552
for (i = 0; i < 4; i++) {
553
if (ib_mutex_test_and_set(mutex) == 0) {
554
/* Succeeded! Free the reserved wait cell */
556
sync_array_free_cell(sync_arr, index);
558
ut_d(mutex->thread_id = os_thread_get_curr_id());
559
#ifdef UNIV_SYNC_DEBUG
560
mutex_set_debug_info(mutex, file_name, line);
565
/* Note that in this case we leave the waiters field
566
set to 1. We cannot reset it to zero, as we do not
567
know if there are other waiters. */
571
/* Now we know that there has been some thread holding the mutex
572
after the change in the wait array and the waiters field was made.
573
Now there is no risk of infinite wait on the event. */
575
mutex_os_wait_count.add(counter_index, 1);
577
mutex->count_os_wait++;
579
sync_array_wait_event(sync_arr, index);
584
/******************************************************************//**
585
Releases the threads waiting in the primary wait array for this mutex. */
590
ib_mutex_t* mutex) /*!< in: mutex */
592
mutex_set_waiters(mutex, 0);
594
/* The memory order of resetting the waiters field and
595
signaling the object is important. See LEMMA 1 above. */
596
os_event_set(mutex->event);
597
sync_array_object_signalled();
600
#ifdef UNIV_SYNC_DEBUG
601
/******************************************************************//**
602
Sets the debug information for a reserved mutex. */
605
mutex_set_debug_info(
606
/*=================*/
607
ib_mutex_t* mutex, /*!< in: mutex */
608
const char* file_name, /*!< in: file where requested */
609
ulint line) /*!< in: line where requested */
614
sync_thread_add_level(mutex, mutex->level, FALSE);
616
mutex->file_name = file_name;
620
/******************************************************************//**
621
Gets the debug information for a reserved mutex. */
624
mutex_get_debug_info(
625
/*=================*/
626
ib_mutex_t* mutex, /*!< in: mutex */
627
const char** file_name, /*!< out: file where requested */
628
ulint* line, /*!< out: line where requested */
629
os_thread_id_t* thread_id) /*!< out: id of the thread which owns
634
*file_name = mutex->file_name;
636
*thread_id = mutex->thread_id;
639
/******************************************************************//**
640
Prints debug info of currently reserved mutexes. */
643
mutex_list_print_info(
644
/*==================*/
645
FILE* file) /*!< in: file where to print */
648
const char* file_name;
650
os_thread_id_t thread_id;
655
"----------\n", file);
657
mutex_enter(&mutex_list_mutex);
659
mutex = UT_LIST_GET_FIRST(mutex_list);
661
while (mutex != NULL) {
664
if (mutex_get_lock_word(mutex) != 0) {
665
mutex_get_debug_info(mutex, &file_name, &line,
668
"Locked mutex: addr %p thread %ld"
669
" file %s line %ld\n",
670
(void*) mutex, os_thread_pf(thread_id),
674
mutex = UT_LIST_GET_NEXT(list, mutex);
677
fprintf(file, "Total number of mutexes %ld\n", count);
679
mutex_exit(&mutex_list_mutex);
682
/******************************************************************//**
683
Counts currently reserved mutexes. Works only in the debug version.
684
@return number of reserved mutexes */
687
mutex_n_reserved(void)
688
/*==================*/
693
mutex_enter(&mutex_list_mutex);
695
for (mutex = UT_LIST_GET_FIRST(mutex_list);
697
mutex = UT_LIST_GET_NEXT(list, mutex)) {
699
if (mutex_get_lock_word(mutex) != 0) {
705
mutex_exit(&mutex_list_mutex);
709
/* Subtract one, because this function itself was holding
710
one mutex (mutex_list_mutex) */
715
/******************************************************************//**
716
Returns TRUE if no mutex or rw-lock is currently locked. Works only in
718
@return TRUE if no mutexes and rw-locks reserved */
724
return(mutex_n_reserved() + rw_lock_n_locked() == 0);
727
/******************************************************************//**
728
Looks for the thread slot for the calling thread.
729
@return pointer to thread slot, NULL if not found */
732
sync_thread_level_arrays_find_slot(void)
733
/*====================================*/
739
id = os_thread_get_curr_id();
741
for (i = 0; i < OS_THREAD_MAX_N; i++) {
744
slot = &sync_thread_level_arrays[i];
746
if (slot->levels && os_thread_eq(slot->id, id)) {
755
/******************************************************************//**
756
Looks for an unused thread slot.
757
@return pointer to thread slot */
760
sync_thread_level_arrays_find_free(void)
761
/*====================================*/
766
for (i = 0; i < OS_THREAD_MAX_N; i++) {
769
slot = &sync_thread_level_arrays[i];
771
if (slot->levels == NULL) {
780
/******************************************************************//**
786
const sync_level_t* slot) /*!< in: slot for which to
791
mutex = static_cast<ib_mutex_t*>(slot->latch);
793
if (mutex->magic_n == MUTEX_MAGIC_N) {
795
"Mutex created at %s %lu\n",
796
innobase_basename(mutex->cfile_name),
797
(ulong) mutex->cline);
799
if (mutex_get_lock_word(mutex) != 0) {
801
const char* file_name;
802
os_thread_id_t thread_id;
804
mutex_get_debug_info(
805
mutex, &file_name, &line, &thread_id);
808
"InnoDB: Locked mutex:"
809
" addr %p thread %ld file %s line %ld\n",
810
(void*) mutex, os_thread_pf(thread_id),
811
file_name, (ulong) line);
813
fputs("Not locked\n", stderr);
818
lock = static_cast<rw_lock_t*>(slot->latch);
824
/******************************************************************//**
825
Checks if all the level values stored in the level array are greater than
827
@return TRUE if all greater */
830
sync_thread_levels_g(
831
/*=================*/
832
sync_arr_t* arr, /*!< in: pointer to level array for an OS
834
ulint limit, /*!< in: level limit */
835
ulint warn) /*!< in: TRUE=display a diagnostic message */
839
for (i = 0; i < arr->n_elems; i++) {
840
const sync_level_t* slot;
842
slot = &arr->elems[i];
844
if (slot->latch != NULL && slot->level <= limit) {
847
"InnoDB: sync levels should be"
848
" > %lu but a level is %lu\n",
849
(ulong) limit, (ulong) slot->level);
851
sync_print_warning(slot);
861
/******************************************************************//**
862
Checks if the level value is stored in the level array.
863
@return slot if found or NULL */
866
sync_thread_levels_contain(
867
/*=======================*/
868
sync_arr_t* arr, /*!< in: pointer to level array for an OS
870
ulint level) /*!< in: level */
874
for (i = 0; i < arr->n_elems; i++) {
875
const sync_level_t* slot;
877
slot = &arr->elems[i];
879
if (slot->latch != NULL && slot->level == level) {
888
/******************************************************************//**
889
Checks if the level array for the current thread contains a
890
mutex or rw-latch at the specified level.
891
@return a matching latch, or NULL if not found */
894
sync_thread_levels_contains(
895
/*========================*/
896
ulint level) /*!< in: latching order level
901
sync_thread_t* thread_slot;
903
if (!sync_order_checks_on) {
908
mutex_enter(&sync_thread_mutex);
910
thread_slot = sync_thread_level_arrays_find_slot();
912
if (thread_slot == NULL) {
914
mutex_exit(&sync_thread_mutex);
919
arr = thread_slot->levels;
921
for (i = 0; i < arr->n_elems; i++) {
924
slot = &arr->elems[i];
926
if (slot->latch != NULL && slot->level == level) {
928
mutex_exit(&sync_thread_mutex);
933
mutex_exit(&sync_thread_mutex);
938
/******************************************************************//**
939
Checks that the level array for the current thread is empty.
940
@return a latch, or NULL if empty except the exceptions specified below */
943
sync_thread_levels_nonempty_gen(
944
/*============================*/
945
ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
946
allowed to be owned by the thread */
950
sync_thread_t* thread_slot;
952
if (!sync_order_checks_on) {
957
mutex_enter(&sync_thread_mutex);
959
thread_slot = sync_thread_level_arrays_find_slot();
961
if (thread_slot == NULL) {
963
mutex_exit(&sync_thread_mutex);
968
arr = thread_slot->levels;
970
for (i = 0; i < arr->n_elems; ++i) {
971
const sync_level_t* slot;
973
slot = &arr->elems[i];
975
if (slot->latch != NULL
976
&& (!dict_mutex_allowed
977
|| (slot->level != SYNC_DICT
978
&& slot->level != SYNC_DICT_OPERATION
979
&& slot->level != SYNC_FTS_CACHE))) {
981
mutex_exit(&sync_thread_mutex);
988
mutex_exit(&sync_thread_mutex);
993
/******************************************************************//**
994
Checks if the level array for the current thread is empty,
995
except for the btr_search_latch.
996
@return a latch, or NULL if empty except the exceptions specified below */
999
sync_thread_levels_nonempty_trx(
1000
/*============================*/
1001
ibool has_search_latch)
1002
/*!< in: TRUE if and only if the thread
1003
is supposed to hold btr_search_latch */
1007
sync_thread_t* thread_slot;
1009
if (!sync_order_checks_on) {
1014
ut_a(!has_search_latch
1015
|| sync_thread_levels_contains(SYNC_SEARCH_SYS));
1017
mutex_enter(&sync_thread_mutex);
1019
thread_slot = sync_thread_level_arrays_find_slot();
1021
if (thread_slot == NULL) {
1023
mutex_exit(&sync_thread_mutex);
1028
arr = thread_slot->levels;
1030
for (i = 0; i < arr->n_elems; ++i) {
1031
const sync_level_t* slot;
1033
slot = &arr->elems[i];
1035
if (slot->latch != NULL
1036
&& (!has_search_latch
1037
|| slot->level != SYNC_SEARCH_SYS)) {
1039
mutex_exit(&sync_thread_mutex);
1042
return(slot->latch);
1046
mutex_exit(&sync_thread_mutex);
1051
/******************************************************************//**
1052
Adds a latch and its level in the thread level array. Allocates the memory
1053
for the array if called first time for this OS thread. Makes the checks
1054
against other latch levels stored in the array for this thread. */
1057
sync_thread_add_level(
1058
/*==================*/
1059
void* latch, /*!< in: pointer to a mutex or an rw-lock */
1060
ulint level, /*!< in: level in the latching order; if
1061
SYNC_LEVEL_VARYING, nothing is done */
1062
ibool relock) /*!< in: TRUE if re-entering an x-lock */
1067
sync_thread_t* thread_slot;
1069
if (!sync_order_checks_on) {
1074
if ((latch == (void*) &sync_thread_mutex)
1075
|| (latch == (void*) &mutex_list_mutex)
1076
|| (latch == (void*) &rw_lock_debug_mutex)
1077
|| (latch == (void*) &rw_lock_list_mutex)) {
1082
if (level == SYNC_LEVEL_VARYING) {
1087
mutex_enter(&sync_thread_mutex);
1089
thread_slot = sync_thread_level_arrays_find_slot();
1091
if (thread_slot == NULL) {
1095
+ (sizeof(*array->elems) * SYNC_THREAD_N_LEVELS);
1097
/* We have to allocate the level array for a new thread */
1098
array = static_cast<sync_arr_t*>(calloc(sz, sizeof(char)));
1099
ut_a(array != NULL);
1101
array->next_free = ULINT_UNDEFINED;
1102
array->max_elems = SYNC_THREAD_N_LEVELS;
1103
array->elems = (sync_level_t*) &array[1];
1105
thread_slot = sync_thread_level_arrays_find_free();
1107
thread_slot->levels = array;
1108
thread_slot->id = os_thread_get_curr_id();
1111
array = thread_slot->levels;
1117
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
1118
B-tree height changes, then a leaf can change to an internal node
1119
or the other way around. We do not know at present if this can cause
1120
unnecessary assertion failures below. */
1123
case SYNC_NO_ORDER_CHECK:
1124
case SYNC_EXTERN_STORAGE:
1125
case SYNC_TREE_NODE_FROM_HASH:
1126
/* Do no order checking */
1128
case SYNC_TRX_SYS_HEADER:
1129
if (srv_is_being_started) {
1130
/* This is violated during trx_sys_create_rsegs()
1131
when creating additional rollback segments when
1132
upgrading in innobase_start_or_create_for_mysql(). */
1138
case SYNC_FTS_BG_THREADS:
1139
case SYNC_WORK_QUEUE:
1140
case SYNC_FTS_OPTIMIZE:
1141
case SYNC_FTS_CACHE:
1142
case SYNC_FTS_CACHE_INIT:
1144
case SYNC_LOG_FLUSH_ORDER:
1145
case SYNC_ANY_LATCH:
1146
case SYNC_FILE_FORMAT_TAG:
1147
case SYNC_DOUBLEWRITE:
1148
case SYNC_SEARCH_SYS:
1151
case SYNC_LOCK_WAIT_SYS:
1153
case SYNC_IBUF_BITMAP_MUTEX:
1156
case SYNC_PURGE_LATCH:
1157
case SYNC_PURGE_QUEUE:
1158
case SYNC_DICT_AUTOINC_MUTEX:
1159
case SYNC_DICT_OPERATION:
1160
case SYNC_DICT_HEADER:
1161
case SYNC_TRX_I_S_RWLOCK:
1162
case SYNC_TRX_I_S_LAST_READ:
1163
case SYNC_IBUF_MUTEX:
1164
case SYNC_INDEX_ONLINE_LOG:
1165
case SYNC_STATS_AUTO_RECALC:
1166
if (!sync_thread_levels_g(array, level, TRUE)) {
1168
"InnoDB: sync_thread_levels_g(array, %lu)"
1169
" does not hold!\n", level);
1174
/* Either the thread must own the lock_sys->mutex, or
1175
it is allowed to own only ONE trx->mutex. */
1176
if (!sync_thread_levels_g(array, level, FALSE)) {
1177
ut_a(sync_thread_levels_g(array, level - 1, TRUE));
1178
ut_a(sync_thread_levels_contain(array, SYNC_LOCK_SYS));
1181
case SYNC_BUF_FLUSH_LIST:
1183
/* We can have multiple mutexes of this type therefore we
1184
can only check whether the greater than condition holds. */
1185
if (!sync_thread_levels_g(array, level-1, TRUE)) {
1187
"InnoDB: sync_thread_levels_g(array, %lu)"
1188
" does not hold!\n", level-1);
1194
case SYNC_BUF_PAGE_HASH:
1195
/* Multiple page_hash locks are only allowed during
1196
buf_validate and that is where buf_pool mutex is already
1200
case SYNC_BUF_BLOCK:
1201
/* Either the thread must own the buffer pool mutex
1202
(buf_pool->mutex), or it is allowed to latch only ONE
1203
buffer block (block->mutex or buf_pool->zip_mutex). */
1204
if (!sync_thread_levels_g(array, level, FALSE)) {
1205
ut_a(sync_thread_levels_g(array, level - 1, TRUE));
1206
ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
1210
if (sync_thread_levels_contain(array, SYNC_LOCK_SYS)) {
1211
ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
1214
ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
1217
case SYNC_IBUF_BITMAP:
1218
/* Either the thread must own the master mutex to all
1219
the bitmap pages, or it is allowed to latch only ONE
1221
if (sync_thread_levels_contain(array,
1222
SYNC_IBUF_BITMAP_MUTEX)) {
1223
ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
1226
/* This is violated during trx_sys_create_rsegs()
1227
when creating additional rollback segments when
1228
upgrading in innobase_start_or_create_for_mysql(). */
1229
ut_a(srv_is_being_started
1230
|| sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
1235
ut_a(sync_thread_levels_contain(array, SYNC_FSP));
1238
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
1239
|| sync_thread_levels_g(array, SYNC_FSP, TRUE));
1241
case SYNC_TRX_UNDO_PAGE:
1242
/* Purge is allowed to read in as many UNDO pages as it likes,
1243
there was a bogus rule here earlier that forced the caller to
1244
acquire the purge_sys_t::mutex. The purge mutex did not really
1245
protect anything because it was only ever acquired by the
1246
single purge thread. The purge thread can read the UNDO pages
1247
without any covering mutex. */
1249
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
1250
|| sync_thread_levels_contain(array, SYNC_RSEG)
1251
|| sync_thread_levels_g(array, level - 1, TRUE));
1253
case SYNC_RSEG_HEADER:
1254
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
1256
case SYNC_RSEG_HEADER_NEW:
1257
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
1259
case SYNC_TREE_NODE:
1260
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
1261
|| sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
1262
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
1264
case SYNC_TREE_NODE_NEW:
1265
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
1267
case SYNC_INDEX_TREE:
1268
ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
1270
case SYNC_IBUF_TREE_NODE:
1271
ut_a(sync_thread_levels_contain(array, SYNC_IBUF_INDEX_TREE)
1272
|| sync_thread_levels_g(array, SYNC_IBUF_TREE_NODE - 1,
1275
case SYNC_IBUF_TREE_NODE_NEW:
1276
/* ibuf_add_free_page() allocates new pages for the
1277
change buffer while only holding the tablespace
1278
x-latch. These pre-allocated new pages may only be
1279
taken in use while holding ibuf_mutex, in
1280
btr_page_alloc_for_ibuf(). */
1281
ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1282
|| sync_thread_levels_contain(array, SYNC_FSP));
1284
case SYNC_IBUF_INDEX_TREE:
1285
if (sync_thread_levels_contain(array, SYNC_FSP)) {
1286
ut_a(sync_thread_levels_g(array, level - 1, TRUE));
1288
ut_a(sync_thread_levels_g(
1289
array, SYNC_IBUF_TREE_NODE - 1, TRUE));
1292
case SYNC_IBUF_PESS_INSERT_MUTEX:
1293
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
1294
ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1296
case SYNC_IBUF_HEADER:
1297
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
1298
ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1299
ut_a(!sync_thread_levels_contain(array,
1300
SYNC_IBUF_PESS_INSERT_MUTEX));
1304
ut_a(buf_debug_prints
1305
|| sync_thread_levels_g(array, SYNC_DICT, TRUE));
1306
#else /* UNIV_DEBUG */
1307
ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
1308
#endif /* UNIV_DEBUG */
1315
if (array->next_free == ULINT_UNDEFINED) {
1316
ut_a(array->n_elems < array->max_elems);
1318
i = array->n_elems++;
1320
i = array->next_free;
1321
array->next_free = array->elems[i].level;
1324
ut_a(i < array->n_elems);
1325
ut_a(i != ULINT_UNDEFINED);
1329
slot = &array->elems[i];
1331
ut_a(slot->latch == NULL);
1333
slot->latch = latch;
1334
slot->level = level;
1336
mutex_exit(&sync_thread_mutex);
1339
/******************************************************************//**
1340
Removes a latch from the thread level array if it is found there.
1341
@return TRUE if found in the array; it is no error if the latch is
1342
not found, as we presently are not able to determine the level for
1343
every latch reservation the program does */
1346
sync_thread_reset_level(
1347
/*====================*/
1348
void* latch) /*!< in: pointer to a mutex or an rw-lock */
1351
sync_thread_t* thread_slot;
1354
if (!sync_order_checks_on) {
1359
if ((latch == (void*) &sync_thread_mutex)
1360
|| (latch == (void*) &mutex_list_mutex)
1361
|| (latch == (void*) &rw_lock_debug_mutex)
1362
|| (latch == (void*) &rw_lock_list_mutex)) {
1367
mutex_enter(&sync_thread_mutex);
1369
thread_slot = sync_thread_level_arrays_find_slot();
1371
if (thread_slot == NULL) {
1375
mutex_exit(&sync_thread_mutex);
1379
array = thread_slot->levels;
1381
for (i = 0; i < array->n_elems; i++) {
1384
slot = &array->elems[i];
1386
if (slot->latch != latch) {
1392
/* Update the free slot list. See comment in sync_level_t
1393
for the level field. */
1394
slot->level = array->next_free;
1395
array->next_free = i;
1397
ut_a(array->in_use >= 1);
1400
/* If all cells are idle then reset the free
1401
list. The assumption is that this will save
1402
time when we need to scan up to n_elems. */
1404
if (array->in_use == 0) {
1406
array->next_free = ULINT_UNDEFINED;
1409
mutex_exit(&sync_thread_mutex);
1414
if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
1417
rw_lock = (rw_lock_t*) latch;
1419
if (rw_lock->level == SYNC_LEVEL_VARYING) {
1420
mutex_exit(&sync_thread_mutex);
1428
mutex_exit(&sync_thread_mutex);
1432
#endif /* UNIV_SYNC_DEBUG */
1434
/******************************************************************//**
1435
Initializes the synchronization data structures. */
1441
ut_a(sync_initialized == FALSE);
1443
sync_initialized = TRUE;
1445
sync_array_init(OS_THREAD_MAX_N);
1447
#ifdef UNIV_SYNC_DEBUG
1448
/* Create the thread latch level array where the latch levels
1449
are stored for each OS thread */
1451
sync_thread_level_arrays = static_cast<sync_thread_t*>(
1452
calloc(sizeof(sync_thread_t), OS_THREAD_MAX_N));
1454
ut_a(sync_thread_level_arrays != NULL);
1456
#endif /* UNIV_SYNC_DEBUG */
1457
/* Init the mutex list and create the mutex to protect it. */
1459
UT_LIST_INIT(mutex_list);
1460
mutex_create(mutex_list_mutex_key, &mutex_list_mutex,
1461
SYNC_NO_ORDER_CHECK);
1462
#ifdef UNIV_SYNC_DEBUG
1463
mutex_create(sync_thread_mutex_key, &sync_thread_mutex,
1464
SYNC_NO_ORDER_CHECK);
1465
#endif /* UNIV_SYNC_DEBUG */
1467
/* Init the rw-lock list and create the mutex to protect it. */
1469
UT_LIST_INIT(rw_lock_list);
1470
mutex_create(rw_lock_list_mutex_key, &rw_lock_list_mutex,
1471
SYNC_NO_ORDER_CHECK);
1473
#ifdef UNIV_SYNC_DEBUG
1474
mutex_create(rw_lock_debug_mutex_key, &rw_lock_debug_mutex,
1475
SYNC_NO_ORDER_CHECK);
1477
rw_lock_debug_event = os_event_create();
1478
rw_lock_debug_waiters = FALSE;
1479
#endif /* UNIV_SYNC_DEBUG */
1482
#ifdef UNIV_SYNC_DEBUG
1483
/******************************************************************//**
1484
Frees all debug memory. */
1487
sync_thread_level_arrays_free(void)
1488
/*===============================*/
1493
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1494
sync_thread_t* slot;
1496
slot = &sync_thread_level_arrays[i];
1498
/* If this slot was allocated then free the slot memory too. */
1499
if (slot->levels != NULL) {
1501
slot->levels = NULL;
1505
free(sync_thread_level_arrays);
1506
sync_thread_level_arrays = NULL;
1508
#endif /* UNIV_SYNC_DEBUG */
1510
/******************************************************************//**
1511
Frees the resources in InnoDB's own synchronization data structures. Use
1512
os_sync_free() after calling this. */
1522
for (mutex = UT_LIST_GET_FIRST(mutex_list);
1526
#ifdef UNIV_MEM_DEBUG
1527
if (mutex == &mem_hash_mutex) {
1528
mutex = UT_LIST_GET_NEXT(list, mutex);
1531
#endif /* UNIV_MEM_DEBUG */
1535
mutex = UT_LIST_GET_FIRST(mutex_list);
1538
mutex_free(&mutex_list_mutex);
1539
#ifdef UNIV_SYNC_DEBUG
1540
mutex_free(&sync_thread_mutex);
1542
/* Switch latching order checks on in sync0sync.cc */
1543
sync_order_checks_on = FALSE;
1545
sync_thread_level_arrays_free();
1546
#endif /* UNIV_SYNC_DEBUG */
1548
sync_initialized = FALSE;
1551
/*******************************************************************//**
1552
Prints wait info of the sync system. */
1555
sync_print_wait_info(
1556
/*=================*/
1557
FILE* file) /*!< in: file where to print */
1560
"Mutex spin waits "UINT64PF", rounds "UINT64PF", "
1561
"OS waits "UINT64PF"\n"
1562
"RW-shared spins "UINT64PF", rounds "UINT64PF", "
1563
"OS waits "UINT64PF"\n"
1564
"RW-excl spins "UINT64PF", rounds "UINT64PF", "
1565
"OS waits "UINT64PF"\n",
1566
(ib_uint64_t) mutex_spin_wait_count,
1567
(ib_uint64_t) mutex_spin_round_count,
1568
(ib_uint64_t) mutex_os_wait_count,
1569
(ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count,
1570
(ib_uint64_t) rw_lock_stats.rw_s_spin_round_count,
1571
(ib_uint64_t) rw_lock_stats.rw_s_os_wait_count,
1572
(ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count,
1573
(ib_uint64_t) rw_lock_stats.rw_x_spin_round_count,
1574
(ib_uint64_t) rw_lock_stats.rw_x_os_wait_count);
1577
"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
1579
(double) mutex_spin_round_count /
1580
(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
1581
(double) rw_lock_stats.rw_s_spin_round_count /
1582
(rw_lock_stats.rw_s_spin_wait_count
1583
? rw_lock_stats.rw_s_spin_wait_count : 1),
1584
(double) rw_lock_stats.rw_x_spin_round_count /
1585
(rw_lock_stats.rw_x_spin_wait_count
1586
? rw_lock_stats.rw_x_spin_wait_count : 1));
1589
/*******************************************************************//**
1590
Prints info of the sync system. */
1595
FILE* file) /*!< in: file where to print */
1597
#ifdef UNIV_SYNC_DEBUG
1598
mutex_list_print_info(file);
1600
rw_lock_list_print_info(file);
1601
#endif /* UNIV_SYNC_DEBUG */
1603
sync_array_print(file);
1605
sync_print_wait_info(file);