1
/*****************************************************************************
3
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
4
Copyright (c) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22
Place, Suite 330, Boston, MA 02111-1307 USA
24
*****************************************************************************/
26
/**************************************************//**
27
@file sync/sync0sync.c
28
Mutex, the basic synchronization primitive
30
Created 9/5/1995 Heikki Tuuri
31
*******************************************************/
33
#include "sync0sync.h"
35
#include "sync0sync.ic"
41
#include "buf0types.h"
44
REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
45
============================================
47
Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
48
takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
49
Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
50
implement our own efficient spin lock mutex. Future operating systems may
51
provide efficient spin locks, but we cannot count on that.
53
Another reason for implementing a spin lock is that on multiprocessor systems
54
it can be more efficient for a processor to run a loop waiting for the
55
semaphore to be released than to switch to a different thread. A thread switch
56
takes 25 us on both platforms mentioned above. See Gray and Reuter's book
57
Transaction processing for background.
59
How long should the spin loop last before suspending the thread? On a
60
uniprocessor, spinning does not help at all, because if the thread owning the
61
mutex is not executing, it cannot be released. Spinning actually wastes
64
On a multiprocessor, we do not know if the thread owning the mutex is
65
executing or not. Thus it would make sense to spin as long as the operation
66
guarded by the mutex would typically last assuming that the thread is
67
executing. If the mutex is not released by that time, we may assume that the
68
thread owning the mutex is not executing and suspend the waiting thread.
70
A typical operation (where no i/o involved) guarded by a mutex or a read-write
71
lock may last 1 - 20 us on the current Pentium platform. The longest
72
operations are the binary searches on an index node.
74
We conclude that the best choice is to set the spin time at 20 us. Then the
75
system should work well on a multiprocessor. On a uniprocessor we have to
76
make sure that thread swithches due to mutex collisions are not frequent,
77
i.e., they do not happen every 100 us or so, because that wastes too much
78
resources. If the thread switches are not frequent, the 20 us wasted in spin
81
Empirical studies on the effect of spin time should be done for different
85
IMPLEMENTATION OF THE MUTEX
86
===========================
88
For background, see Curt Schimmel's book on Unix implementation on modern
89
architectures. The key points in the implementation are atomicity and
90
serialization of memory accesses. The test-and-set instruction (XCHG in
91
Pentium) must be atomic. As new processors may have weak memory models, also
92
serialization of memory references may be necessary. The successor of Pentium,
93
P6, has at least one mode where the memory model is weak. As far as we know,
94
in Pentium all memory accesses are serialized in the program order and we do
95
not have to worry about the memory model. On other processors there are
96
special machine instructions called a fence, memory barrier, or storage
97
barrier (STBAR in Sparc), which can be used to serialize the memory accesses
98
to happen in program order relative to the fence instruction.
100
Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
101
the atomic test-and-set, but his algorithm should be modified for weak memory
102
models. We do not use Lamport's algorithm, because we guess it is slower than
103
the atomic test-and-set.
105
Our mutex implementation works as follows: After that we perform the atomic
106
test-and-set instruction on the memory word. If the test returns zero, we
107
know we got the lock first. If the test returns not zero, some other thread
108
was quicker and got the lock: then we spin in a loop reading the memory word,
109
waiting it to become zero. It is wise to just read the word in the loop, not
110
perform numerous test-and-set instructions, because they generate memory
111
traffic between the cache and the main memory. The read loop can just access
112
the cache, saving bus bandwidth.
114
If we cannot acquire the mutex lock in the specified time, we reserve a cell
115
in the wait array, set the waiters byte in the mutex to 1. To avoid a race
116
condition, after setting the waiters byte and before suspending the waiting
117
thread, we still have to check that the mutex is reserved, because it may
118
have happened that the thread which was holding the mutex has just released
119
it and did not see the waiters byte set to 1, a case which would lead the
120
other thread to an infinite wait.
122
LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
124
thread will eventually call os_event_set() on that particular event.
125
Thus no infinite wait is possible in this case.
127
Proof: After making the reservation the thread sets the waiters field in the
128
mutex to 1. Then it checks that the mutex is still reserved by some thread,
129
or it reserves the mutex for itself. In any case, some thread (which may be
130
also some earlier thread, not necessarily the one currently holding the mutex)
131
will set the waiters field to 0 in mutex_exit, and then call
132
os_event_set() with the mutex as an argument.
135
LEMMA 2: If an os_event_set() call is made after some thread has called
137
the os_event_reset() and before it starts wait on that event, the call
138
will not be lost to the second thread. This is true even if there is an
139
intervening call to os_event_reset() by another thread.
140
Thus no infinite wait is possible in this case.
142
Proof (non-windows platforms): os_event_reset() returns a monotonically
143
increasing value of signal_count. This value is increased at every
144
call of os_event_set() If thread A has called os_event_reset() followed
145
by thread B calling os_event_set() and then some other thread C calling
146
os_event_reset(), the is_set flag of the event will be set to FALSE;
147
but now if thread A calls os_event_wait_low() with the signal_count
148
value returned from the earlier call of os_event_reset(), it will
149
return immediately without waiting.
152
Proof (windows): If there is a writer thread which is forced to wait for
153
the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
154
The design of rw_lock ensures that there is one and only one thread
155
that is able to change the state to RW_LOCK_WAIT_EX and this thread is
156
guaranteed to acquire the lock after it is released by the current
157
holders and before any other waiter gets the lock.
158
On windows this thread waits on a separate event i.e.: wait_ex_event.
159
Since only one thread can wait on this event there is no chance
160
of this event getting reset before the writer starts wait on it.
161
Therefore, this thread is guaranteed to catch the os_set_event()
162
signalled unconditionally at the release of the lock.
165
/* Number of spin waits on mutexes: for performance monitoring */
167
/** The number of iterations in the mutex_spin_wait() spin loop.
168
Intended for performance monitoring. */
169
static ib_int64_t mutex_spin_round_count = 0;
170
/** The number of mutex_spin_wait() calls. Intended for
171
performance monitoring. */
172
static ib_int64_t mutex_spin_wait_count = 0;
173
/** The number of OS waits in mutex_spin_wait(). Intended for
174
performance monitoring. */
175
static ib_int64_t mutex_os_wait_count = 0;
176
/** The number of mutex_exit() calls. Intended for performance
178
UNIV_INTERN ib_int64_t mutex_exit_count = 0;
180
/** The global array of wait cells for implementation of the database's own
181
mutexes and read-write locks */
182
UNIV_INTERN sync_array_t* sync_primary_wait_array;
184
/** This variable is set to TRUE when sync_init is called */
185
UNIV_INTERN ibool sync_initialized = FALSE;
187
/** An acquired mutex or rw-lock and its level in the latching order */
188
typedef struct sync_level_struct sync_level_t;
189
/** Mutexes or rw-locks held by a thread */
190
typedef struct sync_thread_struct sync_thread_t;
192
#ifdef UNIV_SYNC_DEBUG
193
/** The latch levels currently owned by threads are stored in this data
194
structure; the size of this array is OS_THREAD_MAX_N */
196
UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
198
/** Mutex protecting sync_thread_level_arrays */
199
UNIV_INTERN mutex_t sync_thread_mutex;
200
#endif /* UNIV_SYNC_DEBUG */
202
/** Global list of database mutexes (not OS mutexes) created. */
203
UNIV_INTERN ut_list_base_node_t mutex_list;
205
/** Mutex protecting the mutex_list variable */
206
UNIV_INTERN mutex_t mutex_list_mutex;
208
#ifdef UNIV_SYNC_DEBUG
209
/** Latching order checks start when this is set TRUE */
210
UNIV_INTERN ibool sync_order_checks_on = FALSE;
211
#endif /* UNIV_SYNC_DEBUG */
213
/** Mutexes or rw-locks held by a thread */
214
struct sync_thread_struct{
215
os_thread_id_t id; /*!< OS thread id */
216
sync_level_t* levels; /*!< level array for this thread; if
217
this is NULL this slot is unused */
220
/** Number of slots reserved for each OS thread in the sync level array */
221
#define SYNC_THREAD_N_LEVELS 10000
223
/** An acquired mutex or rw-lock and its level in the latching order */
224
struct sync_level_struct{
225
void* latch; /*!< pointer to a mutex or an rw-lock; NULL means that
227
ulint level; /*!< level of the latch in the latching order */
230
/******************************************************************//**
231
Creates, or rather, initializes a mutex object in a specified memory
232
location (which must be appropriately aligned). The mutex is initialized
233
in the reset state. Explicit freeing of the mutex with mutex_free is
234
necessary only if the memory block containing it is freed. */
239
mutex_t* mutex, /*!< in: pointer to memory */
241
const char* cmutex_name, /*!< in: mutex name */
242
# ifdef UNIV_SYNC_DEBUG
243
ulint level, /*!< in: level */
244
# endif /* UNIV_SYNC_DEBUG */
245
#endif /* UNIV_DEBUG */
246
const char* cfile_name, /*!< in: file name where created */
247
ulint cline) /*!< in: file line where created */
249
#if defined(HAVE_ATOMIC_BUILTINS)
250
mutex_reset_lock_word(mutex);
252
os_fast_mutex_init(&(mutex->os_fast_mutex));
253
mutex->lock_word = 0;
255
mutex->event = os_event_create(NULL);
256
mutex_set_waiters(mutex, 0);
258
mutex->magic_n = MUTEX_MAGIC_N;
259
#endif /* UNIV_DEBUG */
260
#ifdef UNIV_SYNC_DEBUG
262
mutex->file_name = "not yet reserved";
263
mutex->level = level;
264
#endif /* UNIV_SYNC_DEBUG */
265
mutex->cfile_name = cfile_name;
266
mutex->cline = cline;
267
mutex->count_os_wait = 0;
269
mutex->cmutex_name= cmutex_name;
270
mutex->count_using= 0;
271
mutex->mutex_type= 0;
272
mutex->lspent_time= 0;
273
mutex->lmax_spent_time= 0;
274
mutex->count_spin_loop= 0;
275
mutex->count_spin_rounds= 0;
276
mutex->count_os_yield= 0;
277
#endif /* UNIV_DEBUG */
279
/* Check that lock_word is aligned; this is important on Intel */
280
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
282
/* NOTE! The very first mutexes are not put to the mutex list */
284
if ((mutex == &mutex_list_mutex)
285
#ifdef UNIV_SYNC_DEBUG
286
|| (mutex == &sync_thread_mutex)
287
#endif /* UNIV_SYNC_DEBUG */
293
mutex_enter(&mutex_list_mutex);
295
ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
296
|| UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
298
UT_LIST_ADD_FIRST(list, mutex_list, mutex);
300
mutex_exit(&mutex_list_mutex);
303
/******************************************************************//**
304
Calling this function is obligatory only if the memory buffer containing
305
the mutex is freed. Removes a mutex object from the mutex list. The mutex
306
is checked to be in the reset state. */
311
mutex_t* mutex) /*!< in: mutex */
313
ut_ad(mutex_validate(mutex));
314
ut_a(mutex_get_lock_word(mutex) == 0);
315
ut_a(mutex_get_waiters(mutex) == 0);
317
if (mutex != &mutex_list_mutex
318
#ifdef UNIV_SYNC_DEBUG
319
&& mutex != &sync_thread_mutex
320
#endif /* UNIV_SYNC_DEBUG */
323
mutex_enter(&mutex_list_mutex);
325
ut_ad(!UT_LIST_GET_PREV(list, mutex)
326
|| UT_LIST_GET_PREV(list, mutex)->magic_n
328
ut_ad(!UT_LIST_GET_NEXT(list, mutex)
329
|| UT_LIST_GET_NEXT(list, mutex)->magic_n
332
UT_LIST_REMOVE(list, mutex_list, mutex);
334
mutex_exit(&mutex_list_mutex);
337
os_event_free(mutex->event);
339
#if !defined(HAVE_ATOMIC_BUILTINS)
340
os_fast_mutex_free(&(mutex->os_fast_mutex));
342
/* If we free the mutex protecting the mutex list (freeing is
343
not necessary), we have to reset the magic number AFTER removing
347
#endif /* UNIV_DEBUG */
350
/********************************************************************//**
351
NOTE! Use the corresponding macro in the header file, not this function
352
directly. Tries to lock the mutex for the current thread. If the lock is not
353
acquired immediately, returns with return value 1.
354
@return 0 if succeed, 1 if not */
357
mutex_enter_nowait_func(
358
/*====================*/
359
mutex_t* mutex, /*!< in: pointer to mutex */
360
const char* file_name __attribute__((unused)),
361
/*!< in: file name where mutex
363
ulint line __attribute__((unused)))
364
/*!< in: line where requested */
366
ut_ad(mutex_validate(mutex));
368
if (!mutex_test_and_set(mutex)) {
370
ut_d(mutex->thread_id = os_thread_get_curr_id());
371
#ifdef UNIV_SYNC_DEBUG
372
mutex_set_debug_info(mutex, file_name, line);
375
return(0); /* Succeeded! */
382
/******************************************************************//**
383
Checks that the mutex has been initialized.
389
const mutex_t* mutex) /*!< in: mutex */
392
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
397
/******************************************************************//**
398
Checks that the current thread owns the mutex. Works only in the debug
400
@return TRUE if owns */
405
const mutex_t* mutex) /*!< in: mutex */
407
ut_ad(mutex_validate(mutex));
409
return(mutex_get_lock_word(mutex) == 1
410
&& os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
412
#endif /* UNIV_DEBUG */
414
/******************************************************************//**
415
Sets the waiters field in a mutex. */
420
mutex_t* mutex, /*!< in: mutex */
421
ulint n) /*!< in: value to set */
423
volatile ulint* ptr; /* declared volatile to ensure that
424
the value is stored to memory */
427
ptr = &(mutex->waiters);
429
*ptr = n; /* Here we assume that the write of a single
430
word in memory is atomic */
433
/******************************************************************//**
434
Reserves a mutex for the current thread. If the mutex is reserved, the
435
function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
436
for the mutex before suspending the thread. */
441
mutex_t* mutex, /*!< in: pointer to mutex */
442
const char* file_name, /*!< in: file name where mutex
444
ulint line) /*!< in: line where requested */
446
ulint index; /* index of the reserved wait cell */
447
ulint i; /* spin round count */
449
ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
453
uint timer_started = 0;
454
#endif /* UNIV_DEBUG */
457
/* This update is not thread safe, but we don't mind if the count
458
isn't exact. Moved out of ifdef that follows because we are willing
459
to sacrifice the cost of counting this as the data is valuable.
460
Count the number of calls to mutex_spin_wait. */
461
mutex_spin_wait_count++;
467
/* Spin waiting for the lock word to become zero. Note that we do
468
not have to assume that the read access to the lock word is atomic,
469
as the actual locking is always committed with atomic test-and-set.
470
In reality, however, all processors probably have an atomic read of
474
ut_d(mutex->count_spin_loop++);
476
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
477
if (srv_spin_wait_delay) {
478
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
484
if (i == SYNC_SPIN_ROUNDS) {
486
mutex->count_os_yield++;
487
#ifndef UNIV_HOTBACKUP
488
if (timed_mutexes && timer_started == 0) {
489
ut_usectime(&sec, &ms);
490
lstart_time= (ib_int64_t)sec * 1000000 + ms;
493
#endif /* UNIV_HOTBACKUP */
494
#endif /* UNIV_DEBUG */
498
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
500
"Thread %lu spin wait mutex at %p"
501
" cfile %s cline %lu rnds %lu\n",
502
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
503
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
506
mutex_spin_round_count += i;
508
ut_d(mutex->count_spin_rounds += i);
510
if (mutex_test_and_set(mutex) == 0) {
513
ut_d(mutex->thread_id = os_thread_get_curr_id());
514
#ifdef UNIV_SYNC_DEBUG
515
mutex_set_debug_info(mutex, file_name, line);
521
/* We may end up with a situation where lock_word is 0 but the OS
522
fast mutex is still reserved. On FreeBSD the OS does not seem to
523
schedule a thread which is constantly calling pthread_mutex_trylock
524
(in mutex_test_and_set implementation). Then we could end up
525
spinning here indefinitely. The following 'i++' stops this infinite
530
if (i < SYNC_SPIN_ROUNDS) {
534
sync_array_reserve_cell(sync_primary_wait_array, mutex,
535
SYNC_MUTEX, file_name, line, &index);
537
/* The memory order of the array reservation and the change in the
538
waiters field is important: when we suspend a thread, we first
539
reserve the cell and then set waiters field to 1. When threads are
540
released in mutex_exit, the waiters field is first set to zero and
541
then the event is set to the signaled state. */
543
mutex_set_waiters(mutex, 1);
545
/* Try to reserve still a few times */
546
for (i = 0; i < 4; i++) {
547
if (mutex_test_and_set(mutex) == 0) {
548
/* Succeeded! Free the reserved wait cell */
550
sync_array_free_cell(sync_primary_wait_array, index);
552
ut_d(mutex->thread_id = os_thread_get_curr_id());
553
#ifdef UNIV_SYNC_DEBUG
554
mutex_set_debug_info(mutex, file_name, line);
557
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
558
fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
560
(ulong) os_thread_pf(os_thread_get_curr_id()),
566
/* Note that in this case we leave the waiters field
567
set to 1. We cannot reset it to zero, as we do not
568
know if there are other waiters. */
572
/* Now we know that there has been some thread holding the mutex
573
after the change in the wait array and the waiters field was made.
574
Now there is no risk of infinite wait on the event. */
576
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
578
"Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
579
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
580
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
583
mutex_os_wait_count++;
585
mutex->count_os_wait++;
587
/* !!!!! Sometimes os_wait can be called without os_thread_yield */
588
#ifndef UNIV_HOTBACKUP
589
if (timed_mutexes == 1 && timer_started == 0) {
590
ut_usectime(&sec, &ms);
591
lstart_time= (ib_int64_t)sec * 1000000 + ms;
594
#endif /* UNIV_HOTBACKUP */
595
#endif /* UNIV_DEBUG */
597
sync_array_wait_event(sync_primary_wait_array, index);
602
if (timed_mutexes == 1 && timer_started==1) {
603
ut_usectime(&sec, &ms);
604
lfinish_time= (ib_int64_t)sec * 1000000 + ms;
606
ltime_diff= (ulint) (lfinish_time - lstart_time);
607
mutex->lspent_time += ltime_diff;
609
if (mutex->lmax_spent_time < ltime_diff) {
610
mutex->lmax_spent_time= ltime_diff;
613
#endif /* UNIV_DEBUG */
617
/******************************************************************//**
618
Releases the threads waiting in the primary wait array for this mutex. */
623
mutex_t* mutex) /*!< in: mutex */
625
mutex_set_waiters(mutex, 0);
627
/* The memory order of resetting the waiters field and
628
signaling the object is important. See LEMMA 1 above. */
629
os_event_set(mutex->event);
630
sync_array_object_signalled(sync_primary_wait_array);
633
#ifdef UNIV_SYNC_DEBUG
634
/******************************************************************//**
635
Sets the debug information for a reserved mutex. */
638
mutex_set_debug_info(
639
/*=================*/
640
mutex_t* mutex, /*!< in: mutex */
641
const char* file_name, /*!< in: file where requested */
642
ulint line) /*!< in: line where requested */
647
sync_thread_add_level(mutex, mutex->level);
649
mutex->file_name = file_name;
653
/******************************************************************//**
654
Gets the debug information for a reserved mutex. */
657
mutex_get_debug_info(
658
/*=================*/
659
mutex_t* mutex, /*!< in: mutex */
660
const char** file_name, /*!< out: file where requested */
661
ulint* line, /*!< out: line where requested */
662
os_thread_id_t* thread_id) /*!< out: id of the thread which owns
667
*file_name = mutex->file_name;
669
*thread_id = mutex->thread_id;
672
/******************************************************************//**
673
Prints debug info of currently reserved mutexes. */
676
mutex_list_print_info(
677
/*==================*/
678
FILE* file) /*!< in: file where to print */
681
const char* file_name;
683
os_thread_id_t thread_id;
688
"----------\n", file);
690
mutex_enter(&mutex_list_mutex);
692
mutex = UT_LIST_GET_FIRST(mutex_list);
694
while (mutex != NULL) {
697
if (mutex_get_lock_word(mutex) != 0) {
698
mutex_get_debug_info(mutex, &file_name, &line,
701
"Locked mutex: addr %p thread %ld"
702
" file %s line %ld\n",
703
(void*) mutex, os_thread_pf(thread_id),
707
mutex = UT_LIST_GET_NEXT(list, mutex);
710
fprintf(file, "Total number of mutexes %ld\n", count);
712
mutex_exit(&mutex_list_mutex);
715
/******************************************************************//**
716
Counts currently reserved mutexes. Works only in the debug version.
717
@return number of reserved mutexes */
720
mutex_n_reserved(void)
721
/*==================*/
726
mutex_enter(&mutex_list_mutex);
728
mutex = UT_LIST_GET_FIRST(mutex_list);
730
while (mutex != NULL) {
731
if (mutex_get_lock_word(mutex) != 0) {
736
mutex = UT_LIST_GET_NEXT(list, mutex);
739
mutex_exit(&mutex_list_mutex);
743
return(count - 1); /* Subtract one, because this function itself
744
was holding one mutex (mutex_list_mutex) */
747
/******************************************************************//**
748
Returns TRUE if no mutex or rw-lock is currently locked. Works only in
750
@return TRUE if no mutexes and rw-locks reserved */
756
return(mutex_n_reserved() + rw_lock_n_locked() == 0);
759
/******************************************************************//**
760
Gets the value in the nth slot in the thread level arrays.
761
@return pointer to thread slot */
764
sync_thread_level_arrays_get_nth(
765
/*=============================*/
766
ulint n) /*!< in: slot number */
768
ut_ad(n < OS_THREAD_MAX_N);
770
return(sync_thread_level_arrays + n);
773
/******************************************************************//**
774
Looks for the thread slot for the calling thread.
775
@return pointer to thread slot, NULL if not found */
778
sync_thread_level_arrays_find_slot(void)
779
/*====================================*/
786
id = os_thread_get_curr_id();
788
for (i = 0; i < OS_THREAD_MAX_N; i++) {
790
slot = sync_thread_level_arrays_get_nth(i);
792
if (slot->levels && os_thread_eq(slot->id, id)) {
801
/******************************************************************//**
802
Looks for an unused thread slot.
803
@return pointer to thread slot */
806
sync_thread_level_arrays_find_free(void)
807
/*====================================*/
813
for (i = 0; i < OS_THREAD_MAX_N; i++) {
815
slot = sync_thread_level_arrays_get_nth(i);
817
if (slot->levels == NULL) {
826
/******************************************************************//**
827
Gets the value in the nth slot in the thread level array.
828
@return pointer to level slot */
831
sync_thread_levels_get_nth(
832
/*=======================*/
833
sync_level_t* arr, /*!< in: pointer to level array for an OS
835
ulint n) /*!< in: slot number */
837
ut_ad(n < SYNC_THREAD_N_LEVELS);
842
/******************************************************************//**
843
Checks if all the level values stored in the level array are greater than
845
@return TRUE if all greater */
848
sync_thread_levels_g(
849
/*=================*/
850
sync_level_t* arr, /*!< in: pointer to level array for an OS
852
ulint limit) /*!< in: level limit */
859
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
861
slot = sync_thread_levels_get_nth(arr, i);
863
if (slot->latch != NULL) {
864
if (slot->level <= limit) {
870
"InnoDB: sync levels should be"
871
" > %lu but a level is %lu\n",
872
(ulong) limit, (ulong) slot->level);
874
if (mutex->magic_n == MUTEX_MAGIC_N) {
876
"Mutex created at %s %lu\n",
878
(ulong) mutex->cline);
880
if (mutex_get_lock_word(mutex) != 0) {
881
const char* file_name;
883
os_thread_id_t thread_id;
885
mutex_get_debug_info(
890
"InnoDB: Locked mutex:"
891
" addr %p thread %ld"
892
" file %s line %ld\n",
899
fputs("Not locked\n", stderr);
913
/******************************************************************//**
914
Checks if the level value is stored in the level array.
915
@return TRUE if stored */
918
sync_thread_levels_contain(
919
/*=======================*/
920
sync_level_t* arr, /*!< in: pointer to level array for an OS
922
ulint level) /*!< in: level */
927
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
929
slot = sync_thread_levels_get_nth(arr, i);
931
if (slot->latch != NULL) {
932
if (slot->level == level) {
942
/******************************************************************//**
943
Checks that the level array for the current thread is empty.
944
@return TRUE if empty except the exceptions specified below */
947
sync_thread_levels_empty_gen(
948
/*=========================*/
949
ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
950
allowed to be owned by the thread,
951
also purge_is_running mutex is
955
sync_thread_t* thread_slot;
959
if (!sync_order_checks_on) {
964
mutex_enter(&sync_thread_mutex);
966
thread_slot = sync_thread_level_arrays_find_slot();
968
if (thread_slot == NULL) {
970
mutex_exit(&sync_thread_mutex);
975
arr = thread_slot->levels;
977
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
979
slot = sync_thread_levels_get_nth(arr, i);
981
if (slot->latch != NULL
982
&& (!dict_mutex_allowed
983
|| (slot->level != SYNC_DICT
984
&& slot->level != SYNC_DICT_OPERATION))) {
986
mutex_exit(&sync_thread_mutex);
993
mutex_exit(&sync_thread_mutex);
998
/******************************************************************//**
999
Checks that the level array for the current thread is empty.
1000
@return TRUE if empty */
1003
sync_thread_levels_empty(void)
1004
/*==========================*/
1006
return(sync_thread_levels_empty_gen(FALSE));
1009
/******************************************************************//**
1010
Adds a latch and its level in the thread level array. Allocates the memory
1011
for the array if called first time for this OS thread. Makes the checks
1012
against other latch levels stored in the array for this thread. */
1015
sync_thread_add_level(
1016
/*==================*/
1017
void* latch, /*!< in: pointer to a mutex or an rw-lock */
1018
ulint level) /*!< in: level in the latching order; if
1019
SYNC_LEVEL_VARYING, nothing is done */
1021
sync_level_t* array;
1023
sync_thread_t* thread_slot;
1026
if (!sync_order_checks_on) {
1031
if ((latch == (void*)&sync_thread_mutex)
1032
|| (latch == (void*)&mutex_list_mutex)
1033
|| (latch == (void*)&rw_lock_debug_mutex)
1034
|| (latch == (void*)&rw_lock_list_mutex)) {
1039
if (level == SYNC_LEVEL_VARYING) {
1044
mutex_enter(&sync_thread_mutex);
1046
thread_slot = sync_thread_level_arrays_find_slot();
1048
if (thread_slot == NULL) {
1049
/* We have to allocate the level array for a new thread */
1050
array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
1052
thread_slot = sync_thread_level_arrays_find_free();
1054
thread_slot->id = os_thread_get_curr_id();
1055
thread_slot->levels = array;
1057
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1059
slot = sync_thread_levels_get_nth(array, i);
1065
array = thread_slot->levels;
1067
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
1068
B-tree height changes, then a leaf can change to an internal node
1069
or the other way around. We do not know at present if this can cause
1070
unnecessary assertion failures below. */
1073
case SYNC_NO_ORDER_CHECK:
1074
case SYNC_EXTERN_STORAGE:
1075
case SYNC_TREE_NODE_FROM_HASH:
1076
/* Do no order checking */
1081
case SYNC_WORK_QUEUE:
1083
case SYNC_THR_LOCAL:
1084
case SYNC_ANY_LATCH:
1085
case SYNC_TRX_SYS_HEADER:
1086
case SYNC_FILE_FORMAT_TAG:
1087
case SYNC_DOUBLEWRITE:
1089
case SYNC_SEARCH_SYS:
1090
case SYNC_SEARCH_SYS_CONF:
1091
case SYNC_TRX_LOCK_HEAP:
1093
case SYNC_IBUF_BITMAP_MUTEX:
1096
case SYNC_PURGE_LATCH:
1097
case SYNC_PURGE_SYS:
1098
case SYNC_DICT_AUTOINC_MUTEX:
1099
case SYNC_DICT_OPERATION:
1100
case SYNC_DICT_HEADER:
1101
case SYNC_TRX_I_S_RWLOCK:
1102
case SYNC_TRX_I_S_LAST_READ:
1103
if (!sync_thread_levels_g(array, level)) {
1105
"InnoDB: sync_thread_levels_g(array, %lu)"
1106
" does not hold!\n", level);
1110
case SYNC_BUF_BLOCK:
1111
/* Either the thread must own the buffer pool mutex
1112
(buf_pool_mutex), or it is allowed to latch only ONE
1113
buffer block (block->mutex or buf_pool_zip_mutex). */
1114
if (!sync_thread_levels_g(array, level)) {
1115
ut_a(sync_thread_levels_g(array, level - 1));
1116
ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
1120
ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
1121
&& sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
1122
|| sync_thread_levels_g(array, SYNC_REC_LOCK));
1124
case SYNC_IBUF_BITMAP:
1125
/* Either the thread must own the master mutex to all
1126
the bitmap pages, or it is allowed to latch only ONE
1128
ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
1129
&& sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
1130
|| sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
1133
ut_a(sync_thread_levels_contain(array, SYNC_FSP));
1136
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
1137
|| sync_thread_levels_g(array, SYNC_FSP));
1139
case SYNC_TRX_UNDO_PAGE:
1140
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
1141
|| sync_thread_levels_contain(array, SYNC_RSEG)
1142
|| sync_thread_levels_contain(array, SYNC_PURGE_SYS)
1143
|| sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
1145
case SYNC_RSEG_HEADER:
1146
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
1148
case SYNC_RSEG_HEADER_NEW:
1149
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
1150
&& sync_thread_levels_contain(array, SYNC_FSP_PAGE));
1152
case SYNC_TREE_NODE:
1153
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
1154
|| sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
1155
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
1157
case SYNC_TREE_NODE_NEW:
1158
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
1159
|| sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1161
case SYNC_INDEX_TREE:
1162
ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1163
&& sync_thread_levels_contain(array, SYNC_FSP)
1164
&& sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
1165
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
1167
case SYNC_IBUF_MUTEX:
1168
ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
1170
case SYNC_IBUF_PESS_INSERT_MUTEX:
1171
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
1172
&& !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1174
case SYNC_IBUF_HEADER:
1175
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
1176
&& !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1177
&& !sync_thread_levels_contain(
1178
array, SYNC_IBUF_PESS_INSERT_MUTEX));
1182
ut_a(buf_debug_prints
1183
|| sync_thread_levels_g(array, SYNC_DICT));
1184
#else /* UNIV_DEBUG */
1185
ut_a(sync_thread_levels_g(array, SYNC_DICT));
1186
#endif /* UNIV_DEBUG */
1192
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1194
slot = sync_thread_levels_get_nth(array, i);
1196
if (slot->latch == NULL) {
1197
slot->latch = latch;
1198
slot->level = level;
1204
ut_a(i < SYNC_THREAD_N_LEVELS);
1206
mutex_exit(&sync_thread_mutex);
1209
/******************************************************************//**
1210
Removes a latch from the thread level array if it is found there.
1211
@return TRUE if found in the array; it is no error if the latch is
1212
not found, as we presently are not able to determine the level for
1213
every latch reservation the program does */
1216
sync_thread_reset_level(
1217
/*====================*/
1218
void* latch) /*!< in: pointer to a mutex or an rw-lock */
1220
sync_level_t* array;
1222
sync_thread_t* thread_slot;
1225
if (!sync_order_checks_on) {
1230
if ((latch == (void*)&sync_thread_mutex)
1231
|| (latch == (void*)&mutex_list_mutex)
1232
|| (latch == (void*)&rw_lock_debug_mutex)
1233
|| (latch == (void*)&rw_lock_list_mutex)) {
1238
mutex_enter(&sync_thread_mutex);
1240
thread_slot = sync_thread_level_arrays_find_slot();
1242
if (thread_slot == NULL) {
1246
mutex_exit(&sync_thread_mutex);
1250
array = thread_slot->levels;
1252
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1254
slot = sync_thread_levels_get_nth(array, i);
1256
if (slot->latch == latch) {
1259
mutex_exit(&sync_thread_mutex);
1265
if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
1268
rw_lock = (rw_lock_t*) latch;
1270
if (rw_lock->level == SYNC_LEVEL_VARYING) {
1271
mutex_exit(&sync_thread_mutex);
1279
mutex_exit(&sync_thread_mutex);
1283
#endif /* UNIV_SYNC_DEBUG */
1285
/******************************************************************//**
1286
Initializes the synchronization data structures. */
1292
#ifdef UNIV_SYNC_DEBUG
1293
sync_thread_t* thread_slot;
1295
#endif /* UNIV_SYNC_DEBUG */
1297
ut_a(sync_initialized == FALSE);
1299
sync_initialized = TRUE;
1301
/* Create the primary system wait array which is protected by an OS
1304
sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
1305
SYNC_ARRAY_OS_MUTEX);
1306
#ifdef UNIV_SYNC_DEBUG
1307
/* Create the thread latch level array where the latch levels
1308
are stored for each OS thread */
1310
sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
1311
* sizeof(sync_thread_t));
1312
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1314
thread_slot = sync_thread_level_arrays_get_nth(i);
1315
thread_slot->levels = NULL;
1317
#endif /* UNIV_SYNC_DEBUG */
1318
/* Init the mutex list and create the mutex to protect it. */
1320
UT_LIST_INIT(mutex_list);
1321
mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
1322
#ifdef UNIV_SYNC_DEBUG
1323
mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
1324
#endif /* UNIV_SYNC_DEBUG */
1326
/* Init the rw-lock list and create the mutex to protect it. */
1328
UT_LIST_INIT(rw_lock_list);
1329
mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
1331
#ifdef UNIV_SYNC_DEBUG
1332
mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
1334
rw_lock_debug_event = os_event_create(NULL);
1335
rw_lock_debug_waiters = FALSE;
1336
#endif /* UNIV_SYNC_DEBUG */
1339
/******************************************************************//**
1340
Frees the resources in InnoDB's own synchronization data structures. Use
1341
os_sync_free() after calling this. */
1349
sync_array_free(sync_primary_wait_array);
1351
mutex = UT_LIST_GET_FIRST(mutex_list);
1355
mutex = UT_LIST_GET_FIRST(mutex_list);
1358
mutex_free(&mutex_list_mutex);
1359
#ifdef UNIV_SYNC_DEBUG
1360
mutex_free(&sync_thread_mutex);
1361
#endif /* UNIV_SYNC_DEBUG */
1364
/*******************************************************************//**
1365
Prints wait info of the sync system. */
1368
sync_print_wait_info(
1369
/*=================*/
1370
FILE* file) /*!< in: file where to print */
1372
#ifdef UNIV_SYNC_DEBUG
1373
fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
1374
mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
1378
"Mutex spin waits %"PRId64", rounds %"PRId64", "
1379
"OS waits %"PRId64"\n"
1380
"RW-shared spins %"PRId64", OS waits %"PRId64";"
1381
" RW-excl spins %"PRId64", OS waits %"PRId64"\n",
1382
mutex_spin_wait_count,
1383
mutex_spin_round_count,
1384
mutex_os_wait_count,
1385
rw_s_spin_wait_count,
1387
rw_x_spin_wait_count,
1388
rw_x_os_wait_count);
1391
"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
1393
(double) mutex_spin_round_count /
1394
(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
1395
(double) rw_s_spin_round_count /
1396
(rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
1397
(double) rw_x_spin_round_count /
1398
(rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
1401
/*******************************************************************//**
1402
Prints info of the sync system. */
1407
FILE* file) /*!< in: file where to print */
1409
#ifdef UNIV_SYNC_DEBUG
1410
mutex_list_print_info(file);
1412
rw_lock_list_print_info(file);
1413
#endif /* UNIV_SYNC_DEBUG */
1415
sync_array_print_info(file, sync_primary_wait_array);
1417
sync_print_wait_info(file);