1
/*****************************************************************************
3
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/**************************************************//**
21
The interface to the operating system
22
synchronization primitives.
24
Created 9/6/1995 Heikki Tuuri
25
*******************************************************/
37
#include "srv0start.h"
40
/* Type definition for an operating system mutex struct */
41
struct os_mutex_struct{
42
os_event_t event; /*!< Used by sync0arr.c for queing threads */
43
void* handle; /*!< OS handle to mutex */
44
ulint count; /*!< we use this counter to check
45
that the same thread does not
46
recursively lock the mutex: we
47
do not assume that the OS mutex
48
supports recursive locking, though
49
NT seems to do that */
50
UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
51
/* list of all 'slow' OS mutexes created */
54
/** Mutex protecting counts and the lists of OS mutexes and events */
55
UNIV_INTERN os_mutex_t os_sync_mutex;
56
/** TRUE if os_sync_mutex has been initialized */
57
static ibool os_sync_mutex_inited = FALSE;
58
/** TRUE when os_sync_free() is being executed */
59
static ibool os_sync_free_called = FALSE;
61
/** This is incremented by 1 in os_thread_create and decremented by 1 in
63
UNIV_INTERN ulint os_thread_count = 0;
65
/** The list of all events created */
66
static UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
68
/** The list of all OS 'slow' mutexes */
69
static UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
71
UNIV_INTERN ulint os_event_count = 0;
72
UNIV_INTERN ulint os_mutex_count = 0;
73
UNIV_INTERN ulint os_fast_mutex_count = 0;
75
/* The number of microsecnds in a second. */
76
static const ulint MICROSECS_IN_A_SECOND = 1000000;
78
/* Because a mutex is embedded inside an event and there is an
79
event embedded inside a mutex, on free, this generates a recursive call.
80
This version of the free event function doesn't acquire the global lock */
81
static void os_event_free_internal(os_event_t event);
83
/* On Windows (Vista and later), load function pointers for condition
84
variable handling. Those functions are not available in prior versions,
85
so we have to use them via runtime loading, as long as we support XP. */
86
static void os_cond_module_init(void);
89
/* Prototypes and function pointers for condition variable functions */
90
typedef VOID (WINAPI* InitializeConditionVariableProc)
91
(PCONDITION_VARIABLE ConditionVariable);
92
static InitializeConditionVariableProc initialize_condition_variable;
94
typedef BOOL (WINAPI* SleepConditionVariableCSProc)
95
(PCONDITION_VARIABLE ConditionVariable,
96
PCRITICAL_SECTION CriticalSection,
97
DWORD dwMilliseconds);
98
static SleepConditionVariableCSProc sleep_condition_variable;
100
typedef VOID (WINAPI* WakeAllConditionVariableProc)
101
(PCONDITION_VARIABLE ConditionVariable);
102
static WakeAllConditionVariableProc wake_all_condition_variable;
104
typedef VOID (WINAPI* WakeConditionVariableProc)
105
(PCONDITION_VARIABLE ConditionVariable);
106
static WakeConditionVariableProc wake_condition_variable;
109
/*********************************************************//**
110
Initialitze condition variable */
115
os_cond_t* cond) /*!< in: condition variable. */
120
ut_a(initialize_condition_variable != NULL);
121
initialize_condition_variable(cond);
123
ut_a(pthread_cond_init(cond, NULL) == 0);
127
/*********************************************************//**
128
Do a timed wait on condition variable.
129
@return TRUE if timed out, FALSE otherwise */
134
os_cond_t* cond, /*!< in: condition variable. */
135
os_fast_mutex_t* mutex, /*!< in: fast mutex */
137
const struct timespec* abstime /*!< in: timeout */
139
DWORD time_in_ms /*!< in: timeout in
141
#endif /* !__WIN__ */
148
ut_a(sleep_condition_variable != NULL);
150
ret = sleep_condition_variable(cond, mutex, time_in_ms);
153
err = GetLastError();
154
/* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
155
"Condition variables are subject to spurious wakeups
156
(those not associated with an explicit wake) and stolen wakeups
157
(another thread manages to run before the woken thread)."
158
Check for both types of timeouts.
159
Conditions are checked by the caller.*/
160
if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
171
ret = pthread_cond_timedwait(cond, mutex, abstime);
176
/* We play it safe by checking for EINTR even though
177
according to the POSIX documentation it can't return EINTR. */
182
fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
183
"%d: abstime={%lu,%lu}\n",
184
ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
188
return(ret == ETIMEDOUT);
191
/*********************************************************//**
192
Wait on condition variable */
197
os_cond_t* cond, /*!< in: condition variable. */
198
os_fast_mutex_t* mutex) /*!< in: fast mutex */
204
ut_a(sleep_condition_variable != NULL);
205
ut_a(sleep_condition_variable(cond, mutex, INFINITE));
207
ut_a(pthread_cond_wait(cond, mutex) == 0);
211
/*********************************************************//**
212
Wakes all threads waiting for condition variable */
217
os_cond_t* cond) /*!< in: condition variable. */
222
ut_a(wake_all_condition_variable != NULL);
223
wake_all_condition_variable(cond);
225
ut_a(pthread_cond_broadcast(cond) == 0);
229
/*********************************************************//**
230
Wakes one thread waiting for condition variable */
235
os_cond_t* cond) /*!< in: condition variable. */
240
ut_a(wake_condition_variable != NULL);
241
wake_condition_variable(cond);
243
ut_a(pthread_cond_signal(cond) == 0);
247
/*********************************************************//**
248
Destroys condition variable */
253
os_cond_t* cond) /*!< in: condition variable. */
258
ut_a(pthread_cond_destroy(cond) == 0);
262
/*********************************************************//**
263
On Windows (Vista and later), load function pointers for condition variable
264
handling. Those functions are not available in prior versions, so we have to
265
use them via runtime loading, as long as we support XP. */
268
os_cond_module_init(void)
269
/*=====================*/
274
if (!srv_use_native_conditions)
277
h_dll = GetModuleHandle("kernel32");
279
initialize_condition_variable = (InitializeConditionVariableProc)
280
GetProcAddress(h_dll, "InitializeConditionVariable");
281
sleep_condition_variable = (SleepConditionVariableCSProc)
282
GetProcAddress(h_dll, "SleepConditionVariableCS");
283
wake_all_condition_variable = (WakeAllConditionVariableProc)
284
GetProcAddress(h_dll, "WakeAllConditionVariable");
285
wake_condition_variable = (WakeConditionVariableProc)
286
GetProcAddress(h_dll, "WakeConditionVariable");
288
/* When using native condition variables, check function pointers */
289
ut_a(initialize_condition_variable);
290
ut_a(sleep_condition_variable);
291
ut_a(wake_all_condition_variable);
292
ut_a(wake_condition_variable);
296
/*********************************************************//**
297
Initializes global event and OS 'slow' mutex lists. */
303
UT_LIST_INIT(os_event_list);
304
UT_LIST_INIT(os_mutex_list);
306
os_sync_mutex = NULL;
307
os_sync_mutex_inited = FALSE;
309
/* Now for Windows only */
310
os_cond_module_init();
312
os_sync_mutex = os_mutex_create();
314
os_sync_mutex_inited = TRUE;
317
/*********************************************************//**
318
Frees created events and OS 'slow' mutexes. */
327
os_sync_free_called = TRUE;
328
event = UT_LIST_GET_FIRST(os_event_list);
332
os_event_free(event);
334
event = UT_LIST_GET_FIRST(os_event_list);
337
mutex = UT_LIST_GET_FIRST(os_mutex_list);
340
if (mutex == os_sync_mutex) {
341
/* Set the flag to FALSE so that we do not try to
342
reserve os_sync_mutex any more in remaining freeing
343
operations in shutdown */
344
os_sync_mutex_inited = FALSE;
347
os_mutex_free(mutex);
349
mutex = UT_LIST_GET_FIRST(os_mutex_list);
351
os_sync_free_called = FALSE;
354
/*********************************************************//**
355
Creates an event semaphore, i.e., a semaphore which may just have two
356
states: signaled and nonsignaled. The created event is manual reset: it
357
must be reset explicitly by calling sync_os_reset_event.
358
@return the event handle */
363
const char* name) /*!< in: the name of the event, if NULL
364
the event is created without a name */
369
if(!srv_use_native_conditions) {
371
event = ut_malloc(sizeof(struct os_event_struct));
373
event->handle = CreateEvent(NULL,
377
if (!event->handle) {
379
"InnoDB: Could not create a Windows event"
380
" semaphore; Windows error %lu\n",
381
(ulong) GetLastError());
383
} else /* Windows with condition variables */
389
event = ut_malloc(sizeof(struct os_event_struct));
391
os_fast_mutex_init(&(event->os_mutex));
393
os_cond_init(&(event->cond_var));
395
event->is_set = FALSE;
397
/* We return this value in os_event_reset(), which can then be
398
be used to pass to the os_event_wait_low(). The value of zero
399
is reserved in os_event_wait_low() for the case when the
400
caller does not want to pass any signal_count value. To
401
distinguish between the two cases we initialize signal_count
403
event->signal_count = 1;
406
/* The os_sync_mutex can be NULL because during startup an event
407
can be created [ because it's embedded in the mutex/rwlock ] before
408
this module has been initialized */
409
if (os_sync_mutex != NULL) {
410
os_mutex_enter(os_sync_mutex);
413
/* Put to the list of events */
414
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
418
if (os_sync_mutex != NULL) {
419
os_mutex_exit(os_sync_mutex);
425
/**********************************************************//**
426
Sets an event semaphore to the signaled state: lets waiting threads
432
os_event_t event) /*!< in: event to set */
437
if (!srv_use_native_conditions) {
438
ut_a(SetEvent(event->handle));
445
os_fast_mutex_lock(&(event->os_mutex));
450
event->is_set = TRUE;
451
event->signal_count += 1;
452
os_cond_broadcast(&(event->cond_var));
455
os_fast_mutex_unlock(&(event->os_mutex));
458
/**********************************************************//**
459
Resets an event semaphore to the nonsignaled state. Waiting threads will
460
stop to wait for the event.
461
The return value should be passed to os_even_wait_low() if it is desired
462
that this thread should not wait in case of an intervening call to
463
os_event_set() between this os_event_reset() and the
464
os_event_wait_low() call. See comments for os_event_wait_low().
465
@return current signal_count. */
470
os_event_t event) /*!< in: event to reset */
477
if(!srv_use_native_conditions) {
478
ut_a(ResetEvent(event->handle));
483
os_fast_mutex_lock(&(event->os_mutex));
485
if (!event->is_set) {
488
event->is_set = FALSE;
490
ret = event->signal_count;
492
os_fast_mutex_unlock(&(event->os_mutex));
496
/**********************************************************//**
497
Frees an event object, without acquiring the global lock. */
500
os_event_free_internal(
501
/*===================*/
502
os_event_t event) /*!< in: event to free */
505
if(!srv_use_native_conditions) {
507
ut_a(CloseHandle(event->handle));
513
/* This is to avoid freeing the mutex twice */
514
os_fast_mutex_free(&(event->os_mutex));
516
os_cond_destroy(&(event->cond_var));
519
/* Remove from the list of events */
520
UT_LIST_REMOVE(os_event_list, os_event_list, event);
527
/**********************************************************//**
528
Frees an event object. */
533
os_event_t event) /*!< in: event to free */
538
if(!srv_use_native_conditions){
539
ut_a(CloseHandle(event->handle));
540
} else /*Windows with condition variables */
543
os_fast_mutex_free(&(event->os_mutex));
545
os_cond_destroy(&(event->cond_var));
548
/* Remove from the list of events */
549
os_mutex_enter(os_sync_mutex);
551
UT_LIST_REMOVE(os_event_list, os_event_list, event);
555
os_mutex_exit(os_sync_mutex);
560
/**********************************************************//**
561
Waits for an event object until it is in the signaled state.
563
Typically, if the event has been signalled after the os_event_reset()
564
we'll return immediately because event->is_set == TRUE.
565
There are, however, situations (e.g.: sync_array code) where we may
566
lose this information. For example:
568
thread A calls os_event_reset()
569
thread B calls os_event_set() [event->is_set == TRUE]
570
thread C calls os_event_reset() [event->is_set == FALSE]
571
thread A calls os_event_wait() [infinite wait!]
572
thread C calls os_event_wait() [infinite wait!]
574
Where such a scenario is possible, to avoid infinite wait, the
575
value returned by os_event_reset() should be passed in as
581
os_event_t event, /*!< in: event to wait */
582
ib_int64_t reset_sig_count)/*!< in: zero or the value
583
returned by previous call of
587
if(!srv_use_native_conditions) {
592
UT_NOT_USED(reset_sig_count);
594
/* Specify an infinite wait */
595
err = WaitForSingleObject(event->handle, INFINITE);
597
ut_a(err == WAIT_OBJECT_0);
602
os_fast_mutex_lock(&event->os_mutex);
604
if (!reset_sig_count) {
605
reset_sig_count = event->signal_count;
608
while (!event->is_set && event->signal_count == reset_sig_count) {
609
os_cond_wait(&(event->cond_var), &(event->os_mutex));
611
/* Solaris manual said that spurious wakeups may occur: we
612
have to check if the event really has been signaled after
613
we came here to wait */
616
os_fast_mutex_unlock(&event->os_mutex);
619
/**********************************************************//**
620
Waits for an event object until it is in the signaled state or
621
a timeout is exceeded.
622
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
625
os_event_wait_time_low(
626
/*===================*/
627
os_event_t event, /*!< in: event to wait */
628
ulint time_in_usec, /*!< in: timeout in
630
OS_SYNC_INFINITE_TIME */
631
ib_int64_t reset_sig_count) /*!< in: zero or the value
632
returned by previous call of
636
ibool timed_out = FALSE;
641
if (!srv_use_native_conditions) {
646
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
647
time_in_ms = time_in_usec / 1000;
648
err = WaitForSingleObject(event->handle, time_in_ms);
650
err = WaitForSingleObject(event->handle, INFINITE);
653
if (err == WAIT_OBJECT_0) {
655
} else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
656
return(OS_SYNC_TIME_EXCEEDED);
660
/* Dummy value to eliminate compiler warning. */
663
ut_a(sleep_condition_variable != NULL);
665
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
666
time_in_ms = time_in_usec / 1000;
668
time_in_ms = INFINITE;
672
struct timespec abstime;
674
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
680
ret = ut_usectime(&sec, &usec);
686
tv.tv_usec += time_in_usec;
688
if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
689
tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND;
690
tv.tv_usec %= MICROSECS_IN_A_SECOND;
693
abstime.tv_sec = tv.tv_sec;
694
abstime.tv_nsec = tv.tv_usec * 1000;
696
abstime.tv_nsec = 999999999;
697
abstime.tv_sec = (time_t) ULINT_MAX;
700
ut_a(abstime.tv_nsec <= 999999999);
704
os_fast_mutex_lock(&event->os_mutex);
706
if (!reset_sig_count) {
707
reset_sig_count = event->signal_count;
711
if (event->is_set || event->signal_count != reset_sig_count) {
716
timed_out = os_cond_wait_timed(
717
&event->cond_var, &event->os_mutex,
722
#endif /* !__WIN__ */
725
} while (!timed_out);
727
os_fast_mutex_unlock(&event->os_mutex);
729
return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
732
/*********************************************************//**
733
Creates an operating system mutex semaphore. Because these are slow, the
734
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
735
@return the mutex handle */
738
os_mutex_create(void)
739
/*=================*/
741
os_fast_mutex_t* mutex;
742
os_mutex_t mutex_str;
744
mutex = ut_malloc(sizeof(os_fast_mutex_t));
746
os_fast_mutex_init(mutex);
747
mutex_str = ut_malloc(sizeof(os_mutex_str_t));
749
mutex_str->handle = mutex;
750
mutex_str->count = 0;
751
mutex_str->event = os_event_create(NULL);
753
if (UNIV_LIKELY(os_sync_mutex_inited)) {
754
/* When creating os_sync_mutex itself we cannot reserve it */
755
os_mutex_enter(os_sync_mutex);
758
UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
762
if (UNIV_LIKELY(os_sync_mutex_inited)) {
763
os_mutex_exit(os_sync_mutex);
769
/**********************************************************//**
770
Acquires ownership of a mutex semaphore. */
775
os_mutex_t mutex) /*!< in: mutex to acquire */
777
os_fast_mutex_lock(mutex->handle);
781
ut_a(mutex->count == 1);
784
/**********************************************************//**
785
Releases ownership of a mutex. */
790
os_mutex_t mutex) /*!< in: mutex to release */
794
ut_a(mutex->count == 1);
797
os_fast_mutex_unlock(mutex->handle);
800
/**********************************************************//**
801
Frees a mutex object. */
806
os_mutex_t mutex) /*!< in: mutex to free */
810
if (UNIV_LIKELY(!os_sync_free_called)) {
811
os_event_free_internal(mutex->event);
814
if (UNIV_LIKELY(os_sync_mutex_inited)) {
815
os_mutex_enter(os_sync_mutex);
818
UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
822
if (UNIV_LIKELY(os_sync_mutex_inited)) {
823
os_mutex_exit(os_sync_mutex);
826
os_fast_mutex_free(mutex->handle);
827
ut_free(mutex->handle);
831
/*********************************************************//**
832
Initializes an operating system fast mutex semaphore. */
837
os_fast_mutex_t* fast_mutex) /*!< in: fast mutex */
842
InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
844
ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
846
if (UNIV_LIKELY(os_sync_mutex_inited)) {
847
/* When creating os_sync_mutex itself (in Unix) we cannot
850
os_mutex_enter(os_sync_mutex);
853
os_fast_mutex_count++;
855
if (UNIV_LIKELY(os_sync_mutex_inited)) {
856
os_mutex_exit(os_sync_mutex);
860
/**********************************************************//**
861
Acquires ownership of a fast mutex. */
866
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
869
EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
871
pthread_mutex_lock(fast_mutex);
875
/**********************************************************//**
876
Releases ownership of a fast mutex. */
879
os_fast_mutex_unlock(
880
/*=================*/
881
os_fast_mutex_t* fast_mutex) /*!< in: mutex to release */
884
LeaveCriticalSection(fast_mutex);
886
pthread_mutex_unlock(fast_mutex);
890
/**********************************************************//**
891
Frees a mutex object. */
896
os_fast_mutex_t* fast_mutex) /*!< in: mutex to free */
901
DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
905
ret = pthread_mutex_destroy(fast_mutex);
907
if (UNIV_UNLIKELY(ret != 0)) {
908
ut_print_timestamp(stderr);
910
" InnoDB: error: return value %lu when calling\n"
911
"InnoDB: pthread_mutex_destroy().\n", (ulint)ret);
913
"InnoDB: Byte contents of the pthread mutex at %p:\n",
915
ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
919
if (UNIV_LIKELY(os_sync_mutex_inited)) {
920
/* When freeing the last mutexes, we have
921
already freed os_sync_mutex */
923
os_mutex_enter(os_sync_mutex);
926
ut_ad(os_fast_mutex_count > 0);
927
os_fast_mutex_count--;
929
if (UNIV_LIKELY(os_sync_mutex_inited)) {
930
os_mutex_exit(os_sync_mutex);