1
/*-------------------------------------------------------------------------
4
* Implement PGSemaphores using SysV semaphore facilities
7
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8
* Portions Copyright (c) 1994, Regents of the University of California
11
* src/backend/port/sysv_sema.c
13
*-------------------------------------------------------------------------
26
#ifdef HAVE_KERNEL_OS_H
27
#include <kernel/OS.h>
30
#include "miscadmin.h"
31
#include "storage/ipc.h"
32
#include "storage/pg_sema.h"
35
#ifndef HAVE_UNION_SEMUN
40
unsigned short *array;
44
typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
45
typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
48
* SEMAS_PER_SET is the number of useful semaphores in each semaphore set
49
* we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
50
* per set) parameter, which is often around 25. (Less than, because we
51
* allocate one extra sema in each set for identification purposes.)
53
#define SEMAS_PER_SET 16
55
#define IPCProtection (0600) /* access/modify by user only */
57
#define PGSemaMagic 537 /* must be less than SEMVMX */
60
static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
61
static int numSemaSets; /* number of sema sets acquired so far */
62
static int maxSemaSets; /* allocated size of mySemaSets array */
63
static IpcSemaphoreKey nextSemaKey; /* next key to try using */
64
static int nextSemaNumber; /* next free sem num in last sema set */
67
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
69
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
71
static void IpcSemaphoreKill(IpcSemaphoreId semId);
72
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
73
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
74
static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
75
static void ReleaseSemaphores(int status, Datum arg);
79
* InternalIpcSemaphoreCreate
81
* Attempt to create a new semaphore set with the specified key.
82
* Will fail (return -1) if such a set already exists.
84
* If we fail with a failure code other than collision-with-existing-set,
85
* print out an error and abort. Other types of errors suggest nonrecoverable
89
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
93
semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
98
* Fail quietly if error indicates a collision with existing set. One
99
* would expect EEXIST, given that we said IPC_EXCL, but perhaps we
100
* could get a permission violation instead? Also, EIDRM might occur
101
* if an old set is slated for destruction but not gone yet.
103
if (errno == EEXIST || errno == EACCES
111
* Else complain and abort
114
(errmsg("could not create semaphores: %m"),
115
errdetail("Failed system call was semget(%lu, %d, 0%o).",
116
(unsigned long) semKey, numSems,
117
IPC_CREAT | IPC_EXCL | IPCProtection),
119
errhint("This error does *not* mean that you have run out of disk space.\n"
120
"It occurs when either the system limit for the maximum number of "
121
"semaphore sets (SEMMNI), or the system wide maximum number of "
122
"semaphores (SEMMNS), would be exceeded. You need to raise the "
123
"respective kernel parameter. Alternatively, reduce PostgreSQL's "
124
"consumption of semaphores by reducing its max_connections parameter "
126
"The PostgreSQL documentation contains more information about "
127
"configuring your system for PostgreSQL.",
135
* Initialize a semaphore to the specified value.
138
IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
143
if (semctl(semId, semNum, SETVAL, semun) < 0)
145
(errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
146
semId, semNum, value),
148
errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
149
"%d. Look into the PostgreSQL documentation for details.",
154
* IpcSemaphoreKill(semId) - removes a semaphore set
157
IpcSemaphoreKill(IpcSemaphoreId semId)
161
semun.val = 0; /* unused, but keep compiler quiet */
163
if (semctl(semId, 0, IPC_RMID, semun) < 0)
164
elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
167
/* Get the current value (semval) of the semaphore */
169
IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
171
union semun dummy; /* for Solaris */
173
dummy.val = 0; /* unused */
175
return semctl(semId, semNum, GETVAL, dummy);
178
/* Get the PID of the last process to do semop() on the semaphore */
180
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
182
union semun dummy; /* for Solaris */
184
dummy.val = 0; /* unused */
186
return semctl(semId, semNum, GETPID, dummy);
191
* Create a semaphore set with the given number of useful semaphores
192
* (an additional sema is actually allocated to serve as identifier).
193
* Dead Postgres sema sets are recycled if found, but we do not fail
194
* upon collision with non-Postgres sema sets.
196
* The idea here is to detect and re-use keys that may have been assigned
197
* by a crashed postmaster or backend.
199
static IpcSemaphoreId
200
IpcSemaphoreCreate(int numSems)
202
IpcSemaphoreId semId;
204
PGSemaphoreData mysema;
206
/* Loop till we find a free IPC key */
207
for (nextSemaKey++;; nextSemaKey++)
211
/* Try to create new semaphore set */
212
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
214
break; /* successful create */
216
/* See if it looks to be leftover from a dead Postgres process */
217
semId = semget(nextSemaKey, numSems + 1, 0);
219
continue; /* failed: must be some other app's */
220
if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
221
continue; /* sema belongs to a non-Postgres app */
224
* If the creator PID is my own PID or does not belong to any extant
225
* process, it's safe to zap it.
227
creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
229
continue; /* oops, GETPID failed */
230
if (creatorPID != getpid())
232
if (kill(creatorPID, 0) == 0 || errno != ESRCH)
233
continue; /* sema belongs to a live process */
237
* The sema set appears to be from a dead Postgres process, or from a
238
* previous cycle of life in this same process. Zap it, if possible.
239
* This probably shouldn't fail, but if it does, assume the sema set
240
* belongs to someone else after all, and continue quietly.
242
semun.val = 0; /* unused, but keep compiler quiet */
243
if (semctl(semId, 0, IPC_RMID, semun) < 0)
247
* Now try again to create the sema set.
249
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
251
break; /* successful create */
254
* Can only get here if some other process managed to create the same
255
* sema key before we did. Let him have that one, loop around to try
261
* OK, we created a new sema set. Mark it as created by this process. We
262
* do this by setting the spare semaphore to PGSemaMagic-1 and then
263
* incrementing it with semop(). That leaves it with value PGSemaMagic
264
* and sempid referencing this process.
266
IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
267
mysema.semId = semId;
268
mysema.semNum = numSems;
269
PGSemaphoreUnlock(&mysema);
276
* PGReserveSemaphores --- initialize semaphore support
278
* This is called during postmaster start or shared memory reinitialization.
279
* It should do whatever is needed to be able to support up to maxSemas
280
* subsequent PGSemaphoreCreate calls. Also, if any system resources
281
* are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
282
* callback to release them.
284
* The port number is passed for possible use as a key (for SysV, we use
285
* it to generate the starting semaphore key). In a standalone backend,
286
* zero will be passed.
288
* In the SysV implementation, we acquire semaphore sets on-demand; the
289
* maxSemas parameter is just used to size the array that keeps track of
290
* acquired sets for subsequent releasing.
293
PGReserveSemaphores(int maxSemas, int port)
295
maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
296
mySemaSets = (IpcSemaphoreId *)
297
malloc(maxSemaSets * sizeof(IpcSemaphoreId));
298
if (mySemaSets == NULL)
299
elog(PANIC, "out of memory");
301
nextSemaKey = port * 1000;
302
nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
304
on_shmem_exit(ReleaseSemaphores, 0);
308
* Release semaphores at shutdown or shmem reinitialization
310
* (called as an on_shmem_exit callback, hence funny argument list)
313
ReleaseSemaphores(int status, Datum arg)
317
for (i = 0; i < numSemaSets; i++)
318
IpcSemaphoreKill(mySemaSets[i]);
325
* Initialize a PGSemaphore structure to represent a sema with count 1
328
PGSemaphoreCreate(PGSemaphore sema)
330
/* Can't do this in a backend, because static state is postmaster's */
331
Assert(!IsUnderPostmaster);
333
if (nextSemaNumber >= SEMAS_PER_SET)
335
/* Time to allocate another semaphore set */
336
if (numSemaSets >= maxSemaSets)
337
elog(PANIC, "too many semaphores created");
338
mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
342
/* Assign the next free semaphore in the current set */
343
sema->semId = mySemaSets[numSemaSets - 1];
344
sema->semNum = nextSemaNumber++;
345
/* Initialize it to count 1 */
346
IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
352
* Reset a previously-initialized PGSemaphore to have count 0
355
PGSemaphoreReset(PGSemaphore sema)
357
IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
363
* Lock a semaphore (decrement count), blocking if count would be < 0
366
PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
371
sops.sem_op = -1; /* decrement */
373
sops.sem_num = sema->semNum;
376
* Note: if errStatus is -1 and errno == EINTR then it means we returned
377
* from the operation prematurely because we were sent a signal. So we
378
* try and lock the semaphore again.
380
* Each time around the loop, we check for a cancel/die interrupt. On
381
* some platforms, if such an interrupt comes in while we are waiting, it
382
* will cause the semop() call to exit with errno == EINTR, allowing us to
383
* service the interrupt (if not in a critical section already) during the
384
* next loop iteration.
386
* Once we acquire the lock, we do NOT check for an interrupt before
387
* returning. The caller needs to be able to record ownership of the lock
388
* before any interrupt can be accepted.
390
* There is a window of a few instructions between CHECK_FOR_INTERRUPTS
391
* and entering the semop() call. If a cancel/die interrupt occurs in
392
* that window, we would fail to notice it until after we acquire the lock
393
* (or get another interrupt to escape the semop()). We can avoid this
394
* problem by temporarily setting ImmediateInterruptOK to true before we
395
* do CHECK_FOR_INTERRUPTS; then, a die() interrupt in this interval will
396
* execute directly. However, there is a huge pitfall: there is another
397
* window of a few instructions after the semop() before we are able to
398
* reset ImmediateInterruptOK. If an interrupt occurs then, we'll lose
399
* control, which means that the lock has been acquired but our caller did
400
* not get a chance to record the fact. Therefore, we only set
401
* ImmediateInterruptOK if the caller tells us it's OK to do so, ie, the
402
* caller does not need to record acquiring the lock. (This is currently
403
* true for lockmanager locks, since the process that granted us the lock
404
* did all the necessary state updates. It's not true for SysV semaphores
405
* used to implement LW locks or emulate spinlocks --- but the wait time
406
* for such locks should not be very long, anyway.)
408
* On some platforms, signals marked SA_RESTART (which is most, for us)
409
* will not interrupt the semop(); it will just keep waiting. Therefore
410
* it's necessary for cancel/die interrupts to be serviced directly by the
411
* signal handler. On these platforms the behavior is really the same
412
* whether the signal arrives just before the semop() begins, or while it
413
* is waiting. The loop on EINTR is thus important only for other types
418
ImmediateInterruptOK = interruptOK;
419
CHECK_FOR_INTERRUPTS();
420
errStatus = semop(sema->semId, &sops, 1);
421
ImmediateInterruptOK = false;
422
} while (errStatus < 0 && errno == EINTR);
425
elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
431
* Unlock a semaphore (increment count)
434
PGSemaphoreUnlock(PGSemaphore sema)
439
sops.sem_op = 1; /* increment */
441
sops.sem_num = sema->semNum;
444
* Note: if errStatus is -1 and errno == EINTR then it means we returned
445
* from the operation prematurely because we were sent a signal. So we
446
* try and unlock the semaphore again. Not clear this can really happen,
447
* but might as well cope.
451
errStatus = semop(sema->semId, &sops, 1);
452
} while (errStatus < 0 && errno == EINTR);
455
elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
461
* Lock a semaphore only if able to do so without blocking
464
PGSemaphoreTryLock(PGSemaphore sema)
469
sops.sem_op = -1; /* decrement */
470
sops.sem_flg = IPC_NOWAIT; /* but don't block */
471
sops.sem_num = sema->semNum;
474
* Note: if errStatus is -1 and errno == EINTR then it means we returned
475
* from the operation prematurely because we were sent a signal. So we
476
* try and lock the semaphore again.
480
errStatus = semop(sema->semId, &sops, 1);
481
} while (errStatus < 0 && errno == EINTR);
485
/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
488
return false; /* failed to lock it */
490
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
491
if (errno == EWOULDBLOCK)
492
return false; /* failed to lock it */
494
/* Otherwise we got trouble */
495
elog(FATAL, "semop(id=%d) failed: %m", sema->semId);