1
/*****************************************************************************
3
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
4
Copyright (c) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22
Place, Suite 330, Boston, MA 02111-1307 USA
24
*****************************************************************************/
26
/**************************************************//**
27
@file sync/sync0sync.c
28
Mutex, the basic synchronization primitive
30
Created 9/5/1995 Heikki Tuuri
31
*******************************************************/
33
#include "sync0sync.h"
35
#include "sync0sync.ic"
41
#include "buf0types.h"
42
#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
45
REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
46
============================================
48
Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
49
takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
50
Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
51
implement our own efficient spin lock mutex. Future operating systems may
52
provide efficient spin locks, but we cannot count on that.
54
Another reason for implementing a spin lock is that on multiprocessor systems
55
it can be more efficient for a processor to run a loop waiting for the
56
semaphore to be released than to switch to a different thread. A thread switch
57
takes 25 us on both platforms mentioned above. See Gray and Reuter's book
58
Transaction processing for background.
60
How long should the spin loop last before suspending the thread? On a
61
uniprocessor, spinning does not help at all, because if the thread owning the
62
mutex is not executing, it cannot be released. Spinning actually wastes
65
On a multiprocessor, we do not know if the thread owning the mutex is
66
executing or not. Thus it would make sense to spin as long as the operation
67
guarded by the mutex would typically last assuming that the thread is
68
executing. If the mutex is not released by that time, we may assume that the
69
thread owning the mutex is not executing and suspend the waiting thread.
71
A typical operation (where no i/o involved) guarded by a mutex or a read-write
72
lock may last 1 - 20 us on the current Pentium platform. The longest
73
operations are the binary searches on an index node.
75
We conclude that the best choice is to set the spin time at 20 us. Then the
76
system should work well on a multiprocessor. On a uniprocessor we have to
77
make sure that thread swithches due to mutex collisions are not frequent,
78
i.e., they do not happen every 100 us or so, because that wastes too much
79
resources. If the thread switches are not frequent, the 20 us wasted in spin
82
Empirical studies on the effect of spin time should be done for different
86
IMPLEMENTATION OF THE MUTEX
87
===========================
89
For background, see Curt Schimmel's book on Unix implementation on modern
90
architectures. The key points in the implementation are atomicity and
91
serialization of memory accesses. The test-and-set instruction (XCHG in
92
Pentium) must be atomic. As new processors may have weak memory models, also
93
serialization of memory references may be necessary. The successor of Pentium,
94
P6, has at least one mode where the memory model is weak. As far as we know,
95
in Pentium all memory accesses are serialized in the program order and we do
96
not have to worry about the memory model. On other processors there are
97
special machine instructions called a fence, memory barrier, or storage
98
barrier (STBAR in Sparc), which can be used to serialize the memory accesses
99
to happen in program order relative to the fence instruction.
101
Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
102
the atomic test-and-set, but his algorithm should be modified for weak memory
103
models. We do not use Lamport's algorithm, because we guess it is slower than
104
the atomic test-and-set.
106
Our mutex implementation works as follows: After that we perform the atomic
107
test-and-set instruction on the memory word. If the test returns zero, we
108
know we got the lock first. If the test returns not zero, some other thread
109
was quicker and got the lock: then we spin in a loop reading the memory word,
110
waiting it to become zero. It is wise to just read the word in the loop, not
111
perform numerous test-and-set instructions, because they generate memory
112
traffic between the cache and the main memory. The read loop can just access
113
the cache, saving bus bandwidth.
115
If we cannot acquire the mutex lock in the specified time, we reserve a cell
116
in the wait array, set the waiters byte in the mutex to 1. To avoid a race
117
condition, after setting the waiters byte and before suspending the waiting
118
thread, we still have to check that the mutex is reserved, because it may
119
have happened that the thread which was holding the mutex has just released
120
it and did not see the waiters byte set to 1, a case which would lead the
121
other thread to an infinite wait.
123
LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
125
thread will eventually call os_event_set() on that particular event.
126
Thus no infinite wait is possible in this case.
128
Proof: After making the reservation the thread sets the waiters field in the
129
mutex to 1. Then it checks that the mutex is still reserved by some thread,
130
or it reserves the mutex for itself. In any case, some thread (which may be
131
also some earlier thread, not necessarily the one currently holding the mutex)
132
will set the waiters field to 0 in mutex_exit, and then call
133
os_event_set() with the mutex as an argument.
136
LEMMA 2: If an os_event_set() call is made after some thread has called
138
the os_event_reset() and before it starts wait on that event, the call
139
will not be lost to the second thread. This is true even if there is an
140
intervening call to os_event_reset() by another thread.
141
Thus no infinite wait is possible in this case.
143
Proof (non-windows platforms): os_event_reset() returns a monotonically
144
increasing value of signal_count. This value is increased at every
145
call of os_event_set() If thread A has called os_event_reset() followed
146
by thread B calling os_event_set() and then some other thread C calling
147
os_event_reset(), the is_set flag of the event will be set to FALSE;
148
but now if thread A calls os_event_wait_low() with the signal_count
149
value returned from the earlier call of os_event_reset(), it will
150
return immediately without waiting.
153
Proof (windows): If there is a writer thread which is forced to wait for
154
the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
155
The design of rw_lock ensures that there is one and only one thread
156
that is able to change the state to RW_LOCK_WAIT_EX and this thread is
157
guaranteed to acquire the lock after it is released by the current
158
holders and before any other waiter gets the lock.
159
On windows this thread waits on a separate event i.e.: wait_ex_event.
160
Since only one thread can wait on this event there is no chance
161
of this event getting reset before the writer starts wait on it.
162
Therefore, this thread is guaranteed to catch the os_set_event()
163
signalled unconditionally at the release of the lock.
166
/* Number of spin waits on mutexes: for performance monitoring */
168
/** The number of iterations in the mutex_spin_wait() spin loop.
169
Intended for performance monitoring. */
170
static ib_int64_t mutex_spin_round_count = 0;
171
/** The number of mutex_spin_wait() calls. Intended for
172
performance monitoring. */
173
static ib_int64_t mutex_spin_wait_count = 0;
174
/** The number of OS waits in mutex_spin_wait(). Intended for
175
performance monitoring. */
176
static ib_int64_t mutex_os_wait_count = 0;
177
/** The number of mutex_exit() calls. Intended for performance
179
UNIV_INTERN ib_int64_t mutex_exit_count = 0;
181
/** The global array of wait cells for implementation of the database's own
182
mutexes and read-write locks */
183
UNIV_INTERN sync_array_t* sync_primary_wait_array;
185
/** This variable is set to TRUE when sync_init is called */
186
UNIV_INTERN ibool sync_initialized = FALSE;
188
/** An acquired mutex or rw-lock and its level in the latching order */
189
typedef struct sync_level_struct sync_level_t;
190
/** Mutexes or rw-locks held by a thread */
191
typedef struct sync_thread_struct sync_thread_t;
193
#ifdef UNIV_SYNC_DEBUG
194
/** The latch levels currently owned by threads are stored in this data
195
structure; the size of this array is OS_THREAD_MAX_N */
197
UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
199
/** Mutex protecting sync_thread_level_arrays */
200
UNIV_INTERN mutex_t sync_thread_mutex;
201
#endif /* UNIV_SYNC_DEBUG */
203
/** Global list of database mutexes (not OS mutexes) created. */
204
UNIV_INTERN ut_list_base_node_t mutex_list;
206
/** Mutex protecting the mutex_list variable */
207
UNIV_INTERN mutex_t mutex_list_mutex;
209
#ifdef UNIV_SYNC_DEBUG
210
/** Latching order checks start when this is set TRUE */
211
UNIV_INTERN ibool sync_order_checks_on = FALSE;
212
#endif /* UNIV_SYNC_DEBUG */
214
/** Mutexes or rw-locks held by a thread */
215
struct sync_thread_struct{
216
os_thread_id_t id; /*!< OS thread id */
217
sync_level_t* levels; /*!< level array for this thread; if
218
this is NULL this slot is unused */
221
/** Number of slots reserved for each OS thread in the sync level array */
222
#define SYNC_THREAD_N_LEVELS 10000
224
/** An acquired mutex or rw-lock and its level in the latching order */
225
struct sync_level_struct{
226
void* latch; /*!< pointer to a mutex or an rw-lock; NULL means that
228
ulint level; /*!< level of the latch in the latching order */
231
/******************************************************************//**
232
Creates, or rather, initializes a mutex object in a specified memory
233
location (which must be appropriately aligned). The mutex is initialized
234
in the reset state. Explicit freeing of the mutex with mutex_free is
235
necessary only if the memory block containing it is freed. */
240
mutex_t* mutex, /*!< in: pointer to memory */
242
const char* cmutex_name, /*!< in: mutex name */
243
# ifdef UNIV_SYNC_DEBUG
244
ulint level, /*!< in: level */
245
# endif /* UNIV_SYNC_DEBUG */
246
#endif /* UNIV_DEBUG */
247
const char* cfile_name, /*!< in: file name where created */
248
ulint cline) /*!< in: file line where created */
250
#if defined(HAVE_ATOMIC_BUILTINS)
251
mutex_reset_lock_word(mutex);
253
os_fast_mutex_init(&(mutex->os_fast_mutex));
254
mutex->lock_word = 0;
256
mutex->event = os_event_create(NULL);
257
mutex_set_waiters(mutex, 0);
259
mutex->magic_n = MUTEX_MAGIC_N;
260
#endif /* UNIV_DEBUG */
261
#ifdef UNIV_SYNC_DEBUG
263
mutex->file_name = "not yet reserved";
264
mutex->level = level;
265
#endif /* UNIV_SYNC_DEBUG */
266
mutex->cfile_name = cfile_name;
267
mutex->cline = cline;
268
mutex->count_os_wait = 0;
270
mutex->cmutex_name= cmutex_name;
271
mutex->count_using= 0;
272
mutex->mutex_type= 0;
273
mutex->lspent_time= 0;
274
mutex->lmax_spent_time= 0;
275
mutex->count_spin_loop= 0;
276
mutex->count_spin_rounds= 0;
277
mutex->count_os_yield= 0;
278
#endif /* UNIV_DEBUG */
280
/* Check that lock_word is aligned; this is important on Intel */
281
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
283
/* NOTE! The very first mutexes are not put to the mutex list */
285
if ((mutex == &mutex_list_mutex)
286
#ifdef UNIV_SYNC_DEBUG
287
|| (mutex == &sync_thread_mutex)
288
#endif /* UNIV_SYNC_DEBUG */
294
mutex_enter(&mutex_list_mutex);
296
ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
297
|| UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
299
UT_LIST_ADD_FIRST(list, mutex_list, mutex);
301
mutex_exit(&mutex_list_mutex);
304
/******************************************************************//**
305
Calling this function is obligatory only if the memory buffer containing
306
the mutex is freed. Removes a mutex object from the mutex list. The mutex
307
is checked to be in the reset state. */
312
mutex_t* mutex) /*!< in: mutex */
314
ut_ad(mutex_validate(mutex));
315
ut_a(mutex_get_lock_word(mutex) == 0);
316
ut_a(mutex_get_waiters(mutex) == 0);
318
if (mutex != &mutex_list_mutex
319
#ifdef UNIV_SYNC_DEBUG
320
&& mutex != &sync_thread_mutex
321
#endif /* UNIV_SYNC_DEBUG */
324
mutex_enter(&mutex_list_mutex);
326
ut_ad(!UT_LIST_GET_PREV(list, mutex)
327
|| UT_LIST_GET_PREV(list, mutex)->magic_n
329
ut_ad(!UT_LIST_GET_NEXT(list, mutex)
330
|| UT_LIST_GET_NEXT(list, mutex)->magic_n
333
UT_LIST_REMOVE(list, mutex_list, mutex);
335
mutex_exit(&mutex_list_mutex);
338
os_event_free(mutex->event);
340
#if !defined(HAVE_ATOMIC_BUILTINS)
341
os_fast_mutex_free(&(mutex->os_fast_mutex));
343
/* If we free the mutex protecting the mutex list (freeing is
344
not necessary), we have to reset the magic number AFTER removing
348
#endif /* UNIV_DEBUG */
351
/********************************************************************//**
352
NOTE! Use the corresponding macro in the header file, not this function
353
directly. Tries to lock the mutex for the current thread. If the lock is not
354
acquired immediately, returns with return value 1.
355
@return 0 if succeed, 1 if not */
358
mutex_enter_nowait_func(
359
/*====================*/
360
mutex_t* mutex, /*!< in: pointer to mutex */
361
const char* file_name __attribute__((unused)),
362
/*!< in: file name where mutex
364
ulint line __attribute__((unused)))
365
/*!< in: line where requested */
367
ut_ad(mutex_validate(mutex));
369
if (!mutex_test_and_set(mutex)) {
371
ut_d(mutex->thread_id = os_thread_get_curr_id());
372
#ifdef UNIV_SYNC_DEBUG
373
mutex_set_debug_info(mutex, file_name, line);
376
return(0); /* Succeeded! */
383
/******************************************************************//**
384
Checks that the mutex has been initialized.
390
const mutex_t* mutex) /*!< in: mutex */
393
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
398
/******************************************************************//**
399
Checks that the current thread owns the mutex. Works only in the debug
401
@return TRUE if owns */
406
const mutex_t* mutex) /*!< in: mutex */
408
ut_ad(mutex_validate(mutex));
410
return(mutex_get_lock_word(mutex) == 1
411
&& os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
413
#endif /* UNIV_DEBUG */
415
/******************************************************************//**
416
Sets the waiters field in a mutex. */
421
mutex_t* mutex, /*!< in: mutex */
422
ulint n) /*!< in: value to set */
424
volatile ulint* ptr; /* declared volatile to ensure that
425
the value is stored to memory */
428
ptr = &(mutex->waiters);
430
*ptr = n; /* Here we assume that the write of a single
431
word in memory is atomic */
434
/******************************************************************//**
435
Reserves a mutex for the current thread. If the mutex is reserved, the
436
function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
437
for the mutex before suspending the thread. */
442
mutex_t* mutex, /*!< in: pointer to mutex */
443
const char* file_name, /*!< in: file name where mutex
445
ulint line) /*!< in: line where requested */
447
ulint index; /* index of the reserved wait cell */
448
ulint i; /* spin round count */
450
ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
454
uint timer_started = 0;
455
#endif /* UNIV_DEBUG */
458
/* This update is not thread safe, but we don't mind if the count
459
isn't exact. Moved out of ifdef that follows because we are willing
460
to sacrifice the cost of counting this as the data is valuable.
461
Count the number of calls to mutex_spin_wait. */
462
mutex_spin_wait_count++;
468
/* Spin waiting for the lock word to become zero. Note that we do
469
not have to assume that the read access to the lock word is atomic,
470
as the actual locking is always committed with atomic test-and-set.
471
In reality, however, all processors probably have an atomic read of
475
ut_d(mutex->count_spin_loop++);
477
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
478
if (srv_spin_wait_delay) {
479
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
485
if (i == SYNC_SPIN_ROUNDS) {
487
mutex->count_os_yield++;
488
#ifndef UNIV_HOTBACKUP
489
if (timed_mutexes && timer_started == 0) {
490
ut_usectime(&sec, &ms);
491
lstart_time= (ib_int64_t)sec * 1000000 + ms;
494
#endif /* UNIV_HOTBACKUP */
495
#endif /* UNIV_DEBUG */
499
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
501
"Thread %lu spin wait mutex at %p"
502
" cfile %s cline %lu rnds %lu\n",
503
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
504
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
507
mutex_spin_round_count += i;
509
ut_d(mutex->count_spin_rounds += i);
511
if (mutex_test_and_set(mutex) == 0) {
514
ut_d(mutex->thread_id = os_thread_get_curr_id());
515
#ifdef UNIV_SYNC_DEBUG
516
mutex_set_debug_info(mutex, file_name, line);
522
/* We may end up with a situation where lock_word is 0 but the OS
523
fast mutex is still reserved. On FreeBSD the OS does not seem to
524
schedule a thread which is constantly calling pthread_mutex_trylock
525
(in mutex_test_and_set implementation). Then we could end up
526
spinning here indefinitely. The following 'i++' stops this infinite
531
if (i < SYNC_SPIN_ROUNDS) {
535
sync_array_reserve_cell(sync_primary_wait_array, mutex,
536
SYNC_MUTEX, file_name, line, &index);
538
/* The memory order of the array reservation and the change in the
539
waiters field is important: when we suspend a thread, we first
540
reserve the cell and then set waiters field to 1. When threads are
541
released in mutex_exit, the waiters field is first set to zero and
542
then the event is set to the signaled state. */
544
mutex_set_waiters(mutex, 1);
546
/* Try to reserve still a few times */
547
for (i = 0; i < 4; i++) {
548
if (mutex_test_and_set(mutex) == 0) {
549
/* Succeeded! Free the reserved wait cell */
551
sync_array_free_cell(sync_primary_wait_array, index);
553
ut_d(mutex->thread_id = os_thread_get_curr_id());
554
#ifdef UNIV_SYNC_DEBUG
555
mutex_set_debug_info(mutex, file_name, line);
558
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
559
fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
561
(ulong) os_thread_pf(os_thread_get_curr_id()),
567
/* Note that in this case we leave the waiters field
568
set to 1. We cannot reset it to zero, as we do not
569
know if there are other waiters. */
573
/* Now we know that there has been some thread holding the mutex
574
after the change in the wait array and the waiters field was made.
575
Now there is no risk of infinite wait on the event. */
577
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
579
"Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
580
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
581
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
584
mutex_os_wait_count++;
586
mutex->count_os_wait++;
588
/* !!!!! Sometimes os_wait can be called without os_thread_yield */
589
#ifndef UNIV_HOTBACKUP
590
if (timed_mutexes == 1 && timer_started == 0) {
591
ut_usectime(&sec, &ms);
592
lstart_time= (ib_int64_t)sec * 1000000 + ms;
595
#endif /* UNIV_HOTBACKUP */
596
#endif /* UNIV_DEBUG */
598
sync_array_wait_event(sync_primary_wait_array, index);
603
if (timed_mutexes == 1 && timer_started==1) {
604
ut_usectime(&sec, &ms);
605
lfinish_time= (ib_int64_t)sec * 1000000 + ms;
607
ltime_diff= (ulint) (lfinish_time - lstart_time);
608
mutex->lspent_time += ltime_diff;
610
if (mutex->lmax_spent_time < ltime_diff) {
611
mutex->lmax_spent_time= ltime_diff;
614
#endif /* UNIV_DEBUG */
618
/******************************************************************//**
619
Releases the threads waiting in the primary wait array for this mutex. */
624
mutex_t* mutex) /*!< in: mutex */
626
mutex_set_waiters(mutex, 0);
628
/* The memory order of resetting the waiters field and
629
signaling the object is important. See LEMMA 1 above. */
630
os_event_set(mutex->event);
631
sync_array_object_signalled(sync_primary_wait_array);
634
#ifdef UNIV_SYNC_DEBUG
635
/******************************************************************//**
636
Sets the debug information for a reserved mutex. */
639
mutex_set_debug_info(
640
/*=================*/
641
mutex_t* mutex, /*!< in: mutex */
642
const char* file_name, /*!< in: file where requested */
643
ulint line) /*!< in: line where requested */
648
sync_thread_add_level(mutex, mutex->level);
650
mutex->file_name = file_name;
654
/******************************************************************//**
655
Gets the debug information for a reserved mutex. */
658
mutex_get_debug_info(
659
/*=================*/
660
mutex_t* mutex, /*!< in: mutex */
661
const char** file_name, /*!< out: file where requested */
662
ulint* line, /*!< out: line where requested */
663
os_thread_id_t* thread_id) /*!< out: id of the thread which owns
668
*file_name = mutex->file_name;
670
*thread_id = mutex->thread_id;
673
/******************************************************************//**
674
Prints debug info of currently reserved mutexes. */
677
mutex_list_print_info(
678
/*==================*/
679
FILE* file) /*!< in: file where to print */
682
const char* file_name;
684
os_thread_id_t thread_id;
689
"----------\n", file);
691
mutex_enter(&mutex_list_mutex);
693
mutex = UT_LIST_GET_FIRST(mutex_list);
695
while (mutex != NULL) {
698
if (mutex_get_lock_word(mutex) != 0) {
699
mutex_get_debug_info(mutex, &file_name, &line,
702
"Locked mutex: addr %p thread %ld"
703
" file %s line %ld\n",
704
(void*) mutex, os_thread_pf(thread_id),
708
mutex = UT_LIST_GET_NEXT(list, mutex);
711
fprintf(file, "Total number of mutexes %ld\n", count);
713
mutex_exit(&mutex_list_mutex);
716
/******************************************************************//**
717
Counts currently reserved mutexes. Works only in the debug version.
718
@return number of reserved mutexes */
721
mutex_n_reserved(void)
722
/*==================*/
727
mutex_enter(&mutex_list_mutex);
729
mutex = UT_LIST_GET_FIRST(mutex_list);
731
while (mutex != NULL) {
732
if (mutex_get_lock_word(mutex) != 0) {
737
mutex = UT_LIST_GET_NEXT(list, mutex);
740
mutex_exit(&mutex_list_mutex);
744
return(count - 1); /* Subtract one, because this function itself
745
was holding one mutex (mutex_list_mutex) */
748
/******************************************************************//**
749
Returns TRUE if no mutex or rw-lock is currently locked. Works only in
751
@return TRUE if no mutexes and rw-locks reserved */
757
return(mutex_n_reserved() + rw_lock_n_locked() == 0);
760
/******************************************************************//**
761
Gets the value in the nth slot in the thread level arrays.
762
@return pointer to thread slot */
765
sync_thread_level_arrays_get_nth(
766
/*=============================*/
767
ulint n) /*!< in: slot number */
769
ut_ad(n < OS_THREAD_MAX_N);
771
return(sync_thread_level_arrays + n);
774
/******************************************************************//**
775
Looks for the thread slot for the calling thread.
776
@return pointer to thread slot, NULL if not found */
779
sync_thread_level_arrays_find_slot(void)
780
/*====================================*/
787
id = os_thread_get_curr_id();
789
for (i = 0; i < OS_THREAD_MAX_N; i++) {
791
slot = sync_thread_level_arrays_get_nth(i);
793
if (slot->levels && os_thread_eq(slot->id, id)) {
802
/******************************************************************//**
803
Looks for an unused thread slot.
804
@return pointer to thread slot */
807
sync_thread_level_arrays_find_free(void)
808
/*====================================*/
814
for (i = 0; i < OS_THREAD_MAX_N; i++) {
816
slot = sync_thread_level_arrays_get_nth(i);
818
if (slot->levels == NULL) {
827
/******************************************************************//**
828
Gets the value in the nth slot in the thread level array.
829
@return pointer to level slot */
832
sync_thread_levels_get_nth(
833
/*=======================*/
834
sync_level_t* arr, /*!< in: pointer to level array for an OS
836
ulint n) /*!< in: slot number */
838
ut_ad(n < SYNC_THREAD_N_LEVELS);
843
/******************************************************************//**
844
Checks if all the level values stored in the level array are greater than
846
@return TRUE if all greater */
849
sync_thread_levels_g(
850
/*=================*/
851
sync_level_t* arr, /*!< in: pointer to level array for an OS
853
ulint limit, /*!< in: level limit */
854
ulint warn) /*!< in: TRUE=display a diagnostic message */
861
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
863
slot = sync_thread_levels_get_nth(arr, i);
865
if (slot->latch != NULL) {
866
if (slot->level <= limit) {
877
"InnoDB: sync levels should be"
878
" > %lu but a level is %lu\n",
879
(ulong) limit, (ulong) slot->level);
881
if (mutex->magic_n == MUTEX_MAGIC_N) {
883
"Mutex created at %s %lu\n",
885
(ulong) mutex->cline);
887
if (mutex_get_lock_word(mutex) != 0) {
888
const char* file_name;
890
os_thread_id_t thread_id;
892
mutex_get_debug_info(
897
"InnoDB: Locked mutex:"
898
" addr %p thread %ld"
899
" file %s line %ld\n",
906
fputs("Not locked\n", stderr);
920
/******************************************************************//**
921
Checks if the level value is stored in the level array.
922
@return TRUE if stored */
925
sync_thread_levels_contain(
926
/*=======================*/
927
sync_level_t* arr, /*!< in: pointer to level array for an OS
929
ulint level) /*!< in: level */
934
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
936
slot = sync_thread_levels_get_nth(arr, i);
938
if (slot->latch != NULL) {
939
if (slot->level == level) {
949
/******************************************************************//**
950
Checks that the level array for the current thread is empty.
951
@return TRUE if empty except the exceptions specified below */
954
sync_thread_levels_empty_gen(
955
/*=========================*/
956
ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
957
allowed to be owned by the thread,
958
also purge_is_running mutex is
962
sync_thread_t* thread_slot;
966
if (!sync_order_checks_on) {
971
mutex_enter(&sync_thread_mutex);
973
thread_slot = sync_thread_level_arrays_find_slot();
975
if (thread_slot == NULL) {
977
mutex_exit(&sync_thread_mutex);
982
arr = thread_slot->levels;
984
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
986
slot = sync_thread_levels_get_nth(arr, i);
988
if (slot->latch != NULL
989
&& (!dict_mutex_allowed
990
|| (slot->level != SYNC_DICT
991
&& slot->level != SYNC_DICT_OPERATION))) {
993
mutex_exit(&sync_thread_mutex);
1000
mutex_exit(&sync_thread_mutex);
1005
/******************************************************************//**
1006
Checks that the level array for the current thread is empty.
1007
@return TRUE if empty */
1010
sync_thread_levels_empty(void)
1011
/*==========================*/
1013
return(sync_thread_levels_empty_gen(FALSE));
1016
/******************************************************************//**
1017
Adds a latch and its level in the thread level array. Allocates the memory
1018
for the array if called first time for this OS thread. Makes the checks
1019
against other latch levels stored in the array for this thread. */
1022
sync_thread_add_level(
1023
/*==================*/
1024
void* latch, /*!< in: pointer to a mutex or an rw-lock */
1025
ulint level) /*!< in: level in the latching order; if
1026
SYNC_LEVEL_VARYING, nothing is done */
1028
sync_level_t* array;
1030
sync_thread_t* thread_slot;
1033
if (!sync_order_checks_on) {
1038
if ((latch == (void*)&sync_thread_mutex)
1039
|| (latch == (void*)&mutex_list_mutex)
1040
|| (latch == (void*)&rw_lock_debug_mutex)
1041
|| (latch == (void*)&rw_lock_list_mutex)) {
1046
if (level == SYNC_LEVEL_VARYING) {
1051
mutex_enter(&sync_thread_mutex);
1053
thread_slot = sync_thread_level_arrays_find_slot();
1055
if (thread_slot == NULL) {
1056
/* We have to allocate the level array for a new thread */
1057
array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
1059
thread_slot = sync_thread_level_arrays_find_free();
1061
thread_slot->id = os_thread_get_curr_id();
1062
thread_slot->levels = array;
1064
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1066
slot = sync_thread_levels_get_nth(array, i);
1072
array = thread_slot->levels;
1074
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
1075
B-tree height changes, then a leaf can change to an internal node
1076
or the other way around. We do not know at present if this can cause
1077
unnecessary assertion failures below. */
1080
case SYNC_NO_ORDER_CHECK:
1081
case SYNC_EXTERN_STORAGE:
1082
case SYNC_TREE_NODE_FROM_HASH:
1083
/* Do no order checking */
1088
case SYNC_WORK_QUEUE:
1090
case SYNC_THR_LOCAL:
1091
case SYNC_ANY_LATCH:
1092
case SYNC_TRX_SYS_HEADER:
1093
case SYNC_FILE_FORMAT_TAG:
1094
case SYNC_DOUBLEWRITE:
1096
case SYNC_SEARCH_SYS:
1097
case SYNC_SEARCH_SYS_CONF:
1098
case SYNC_TRX_LOCK_HEAP:
1100
case SYNC_IBUF_BITMAP_MUTEX:
1103
case SYNC_PURGE_LATCH:
1104
case SYNC_PURGE_SYS:
1105
case SYNC_DICT_AUTOINC_MUTEX:
1106
case SYNC_DICT_OPERATION:
1107
case SYNC_DICT_HEADER:
1108
case SYNC_TRX_I_S_RWLOCK:
1109
case SYNC_TRX_I_S_LAST_READ:
1110
if (!sync_thread_levels_g(array, level, TRUE)) {
1112
"InnoDB: sync_thread_levels_g(array, %lu)"
1113
" does not hold!\n", level);
1117
case SYNC_BUF_BLOCK:
1118
/* Either the thread must own the buffer pool mutex
1119
(buf_pool_mutex), or it is allowed to latch only ONE
1120
buffer block (block->mutex or buf_pool_zip_mutex). */
1121
if (!sync_thread_levels_g(array, level, FALSE)) {
1122
ut_a(sync_thread_levels_g(array, level - 1, TRUE));
1123
ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
1127
if (sync_thread_levels_contain(array, SYNC_KERNEL)) {
1128
ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
1131
ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
1134
case SYNC_IBUF_BITMAP:
1135
/* Either the thread must own the master mutex to all
1136
the bitmap pages, or it is allowed to latch only ONE
1138
if (sync_thread_levels_contain(array,
1139
SYNC_IBUF_BITMAP_MUTEX)) {
1140
ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
1143
ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
1148
ut_a(sync_thread_levels_contain(array, SYNC_FSP));
1151
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
1152
|| sync_thread_levels_g(array, SYNC_FSP, TRUE));
1154
case SYNC_TRX_UNDO_PAGE:
1155
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
1156
|| sync_thread_levels_contain(array, SYNC_RSEG)
1157
|| sync_thread_levels_contain(array, SYNC_PURGE_SYS)
1158
|| sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
1160
case SYNC_RSEG_HEADER:
1161
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
1163
case SYNC_RSEG_HEADER_NEW:
1164
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
1165
&& sync_thread_levels_contain(array, SYNC_FSP_PAGE));
1167
case SYNC_TREE_NODE:
1168
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
1169
|| sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
1170
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
1172
case SYNC_TREE_NODE_NEW:
1173
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
1174
|| sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1176
case SYNC_INDEX_TREE:
1177
if (sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1178
&& sync_thread_levels_contain(array, SYNC_FSP)) {
1179
ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1,
1182
ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1,
1186
case SYNC_IBUF_MUTEX:
1187
ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1, TRUE));
1189
case SYNC_IBUF_PESS_INSERT_MUTEX:
1190
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
1191
ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1193
case SYNC_IBUF_HEADER:
1194
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
1195
ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1196
ut_a(!sync_thread_levels_contain(array,
1197
SYNC_IBUF_PESS_INSERT_MUTEX));
1201
ut_a(buf_debug_prints
1202
|| sync_thread_levels_g(array, SYNC_DICT, TRUE));
1203
#else /* UNIV_DEBUG */
1204
ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
1205
#endif /* UNIV_DEBUG */
1211
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1213
slot = sync_thread_levels_get_nth(array, i);
1215
if (slot->latch == NULL) {
1216
slot->latch = latch;
1217
slot->level = level;
1223
ut_a(i < SYNC_THREAD_N_LEVELS);
1225
mutex_exit(&sync_thread_mutex);
1228
/******************************************************************//**
1229
Removes a latch from the thread level array if it is found there.
1230
@return TRUE if found in the array; it is no error if the latch is
1231
not found, as we presently are not able to determine the level for
1232
every latch reservation the program does */
1235
sync_thread_reset_level(
1236
/*====================*/
1237
void* latch) /*!< in: pointer to a mutex or an rw-lock */
1239
sync_level_t* array;
1241
sync_thread_t* thread_slot;
1244
if (!sync_order_checks_on) {
1249
if ((latch == (void*)&sync_thread_mutex)
1250
|| (latch == (void*)&mutex_list_mutex)
1251
|| (latch == (void*)&rw_lock_debug_mutex)
1252
|| (latch == (void*)&rw_lock_list_mutex)) {
1257
mutex_enter(&sync_thread_mutex);
1259
thread_slot = sync_thread_level_arrays_find_slot();
1261
if (thread_slot == NULL) {
1265
mutex_exit(&sync_thread_mutex);
1269
array = thread_slot->levels;
1271
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1273
slot = sync_thread_levels_get_nth(array, i);
1275
if (slot->latch == latch) {
1278
mutex_exit(&sync_thread_mutex);
1284
if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
1287
rw_lock = (rw_lock_t*) latch;
1289
if (rw_lock->level == SYNC_LEVEL_VARYING) {
1290
mutex_exit(&sync_thread_mutex);
1298
mutex_exit(&sync_thread_mutex);
1302
#endif /* UNIV_SYNC_DEBUG */
1304
/******************************************************************//**
1305
Initializes the synchronization data structures. */
1311
#ifdef UNIV_SYNC_DEBUG
1312
sync_thread_t* thread_slot;
1314
#endif /* UNIV_SYNC_DEBUG */
1316
ut_a(sync_initialized == FALSE);
1318
sync_initialized = TRUE;
1320
/* Create the primary system wait array which is protected by an OS
1323
sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
1324
SYNC_ARRAY_OS_MUTEX);
1325
#ifdef UNIV_SYNC_DEBUG
1326
/* Create the thread latch level array where the latch levels
1327
are stored for each OS thread */
1329
sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
1330
* sizeof(sync_thread_t));
1331
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1333
thread_slot = sync_thread_level_arrays_get_nth(i);
1334
thread_slot->levels = NULL;
1336
#endif /* UNIV_SYNC_DEBUG */
1337
/* Init the mutex list and create the mutex to protect it. */
1339
UT_LIST_INIT(mutex_list);
1340
mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
1341
#ifdef UNIV_SYNC_DEBUG
1342
mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
1343
#endif /* UNIV_SYNC_DEBUG */
1345
/* Init the rw-lock list and create the mutex to protect it. */
1347
UT_LIST_INIT(rw_lock_list);
1348
mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
1350
#ifdef UNIV_SYNC_DEBUG
1351
mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
1353
rw_lock_debug_event = os_event_create(NULL);
1354
rw_lock_debug_waiters = FALSE;
1355
#endif /* UNIV_SYNC_DEBUG */
1358
/******************************************************************//**
1359
Frees the resources in InnoDB's own synchronization data structures. Use
1360
os_sync_free() after calling this. */
1368
sync_array_free(sync_primary_wait_array);
1370
mutex = UT_LIST_GET_FIRST(mutex_list);
1374
mutex = UT_LIST_GET_FIRST(mutex_list);
1377
mutex_free(&mutex_list_mutex);
1378
#ifdef UNIV_SYNC_DEBUG
1379
mutex_free(&sync_thread_mutex);
1381
/* Switch latching order checks on in sync0sync.c */
1382
sync_order_checks_on = FALSE;
1383
#endif /* UNIV_SYNC_DEBUG */
1385
sync_initialized = FALSE;
1388
/*******************************************************************//**
1389
Prints wait info of the sync system. */
1392
sync_print_wait_info(
1393
/*=================*/
1394
FILE* file) /*!< in: file where to print */
1396
#ifdef UNIV_SYNC_DEBUG
1397
fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
1398
mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
1402
"Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
1403
"RW-shared spins %llu, OS waits %llu;"
1404
" RW-excl spins %llu, OS waits %llu\n",
1405
mutex_spin_wait_count,
1406
mutex_spin_round_count,
1407
mutex_os_wait_count,
1408
rw_s_spin_wait_count,
1410
rw_x_spin_wait_count,
1411
rw_x_os_wait_count);
1414
"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
1416
(double) mutex_spin_round_count /
1417
(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
1418
(double) rw_s_spin_round_count /
1419
(rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
1420
(double) rw_x_spin_round_count /
1421
(rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
1424
/*******************************************************************//**
1425
Prints info of the sync system. */
1430
FILE* file) /*!< in: file where to print */
1432
#ifdef UNIV_SYNC_DEBUG
1433
mutex_list_print_info(file);
1435
rw_lock_list_print_info(file);
1436
#endif /* UNIV_SYNC_DEBUG */
1438
sync_array_print_info(file, sync_primary_wait_array);
1440
sync_print_wait_info(file);