1
/*-------------------------------------------------------------------------
4
* Implement PGSemaphores using SysV semaphore facilities
7
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
8
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/port/sysv_sema.c,v 1.16 2004-12-31 22:00:29 pgsql Exp $
13
*-------------------------------------------------------------------------
27
#ifdef HAVE_KERNEL_OS_H
28
#include <kernel/OS.h>
31
#include "miscadmin.h"
32
#include "storage/ipc.h"
33
#include "storage/pg_sema.h"
36
#ifndef HAVE_UNION_SEMUN
41
unsigned short *array;
45
typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
46
typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
49
* SEMAS_PER_SET is the number of useful semaphores in each semaphore set
50
* we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
51
* per set) parameter, which is often around 25. (Less than, because we
52
* allocate one extra sema in each set for identification purposes.)
54
#define SEMAS_PER_SET 16
56
#define IPCProtection (0600) /* access/modify by user only */
58
#define PGSemaMagic 537 /* must be less than SEMVMX */
61
static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so
63
static int numSemaSets; /* number of sema sets acquired so far */
64
static int maxSemaSets; /* allocated size of mySemaSets array */
65
static IpcSemaphoreKey nextSemaKey; /* next key to try using */
66
static int nextSemaNumber; /* next free sem num in last sema set */
69
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
71
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
73
static void IpcSemaphoreKill(IpcSemaphoreId semId);
74
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
75
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
76
static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
77
static void ReleaseSemaphores(int status, Datum arg);
81
* InternalIpcSemaphoreCreate
83
* Attempt to create a new semaphore set with the specified key.
84
* Will fail (return -1) if such a set already exists.
86
* If we fail with a failure code other than collision-with-existing-set,
87
* print out an error and abort. Other types of errors suggest nonrecoverable
91
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
95
semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
100
* Fail quietly if error indicates a collision with existing set.
101
* One would expect EEXIST, given that we said IPC_EXCL, but
102
* perhaps we could get a permission violation instead? Also,
103
* EIDRM might occur if an old set is slated for destruction but
106
if (errno == EEXIST || errno == EACCES
114
* Else complain and abort
117
(errmsg("could not create semaphores: %m"),
118
errdetail("Failed system call was semget(%lu, %d, 0%o).",
119
(unsigned long) semKey, numSems,
120
IPC_CREAT | IPC_EXCL | IPCProtection),
122
errhint("This error does *not* mean that you have run out of disk space.\n"
123
"It occurs when either the system limit for the maximum number of "
124
"semaphore sets (SEMMNI), or the system wide maximum number of "
125
"semaphores (SEMMNS), would be exceeded. You need to raise the "
126
"respective kernel parameter. Alternatively, reduce PostgreSQL's "
127
"consumption of semaphores by reducing its max_connections parameter "
129
"The PostgreSQL documentation contains more information about "
130
"configuring your system for PostgreSQL.",
138
* Initialize a semaphore to the specified value.
141
IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
146
if (semctl(semId, semNum, SETVAL, semun) < 0)
148
(errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
149
semId, semNum, value),
151
errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
152
"%d. Look into the PostgreSQL documentation for details.",
157
* IpcSemaphoreKill(semId) - removes a semaphore set
160
IpcSemaphoreKill(IpcSemaphoreId semId)
164
semun.val = 0; /* unused, but keep compiler quiet */
166
if (semctl(semId, 0, IPC_RMID, semun) < 0)
167
elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
170
/* Get the current value (semval) of the semaphore */
172
IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
174
union semun dummy; /* for Solaris */
176
dummy.val = 0; /* unused */
178
return semctl(semId, semNum, GETVAL, dummy);
181
/* Get the PID of the last process to do semop() on the semaphore */
183
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
185
union semun dummy; /* for Solaris */
187
dummy.val = 0; /* unused */
189
return semctl(semId, semNum, GETPID, dummy);
194
* Create a semaphore set with the given number of useful semaphores
195
* (an additional sema is actually allocated to serve as identifier).
196
* Dead Postgres sema sets are recycled if found, but we do not fail
197
* upon collision with non-Postgres sema sets.
199
* The idea here is to detect and re-use keys that may have been assigned
200
* by a crashed postmaster or backend.
202
static IpcSemaphoreId
203
IpcSemaphoreCreate(int numSems)
205
IpcSemaphoreId semId;
207
PGSemaphoreData mysema;
209
/* Loop till we find a free IPC key */
210
for (nextSemaKey++;; nextSemaKey++)
214
/* Try to create new semaphore set */
215
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
217
break; /* successful create */
219
/* See if it looks to be leftover from a dead Postgres process */
220
semId = semget(nextSemaKey, numSems + 1, 0);
222
continue; /* failed: must be some other app's */
223
if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
224
continue; /* sema belongs to a non-Postgres app */
227
* If the creator PID is my own PID or does not belong to any
228
* extant process, it's safe to zap it.
230
creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
232
continue; /* oops, GETPID failed */
233
if (creatorPID != getpid())
235
if (kill(creatorPID, 0) == 0 || errno != ESRCH)
236
continue; /* sema belongs to a live process */
240
* The sema set appears to be from a dead Postgres process, or
241
* from a previous cycle of life in this same process. Zap it, if
242
* possible. This probably shouldn't fail, but if it does, assume
243
* the sema set belongs to someone else after all, and continue
246
semun.val = 0; /* unused, but keep compiler quiet */
247
if (semctl(semId, 0, IPC_RMID, semun) < 0)
251
* Now try again to create the sema set.
253
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
255
break; /* successful create */
258
* Can only get here if some other process managed to create the
259
* same sema key before we did. Let him have that one, loop
260
* around to try next key.
265
* OK, we created a new sema set. Mark it as created by this process.
266
* We do this by setting the spare semaphore to PGSemaMagic-1 and then
267
* incrementing it with semop(). That leaves it with value
268
* PGSemaMagic and sempid referencing this process.
270
IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
271
mysema.semId = semId;
272
mysema.semNum = numSems;
273
PGSemaphoreUnlock(&mysema);
280
* PGReserveSemaphores --- initialize semaphore support
282
* This is called during postmaster start or shared memory reinitialization.
283
* It should do whatever is needed to be able to support up to maxSemas
284
* subsequent PGSemaphoreCreate calls. Also, if any system resources
285
* are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
286
* callback to release them.
288
* The port number is passed for possible use as a key (for SysV, we use
289
* it to generate the starting semaphore key). In a standalone backend,
290
* zero will be passed.
292
* In the SysV implementation, we acquire semaphore sets on-demand; the
293
* maxSemas parameter is just used to size the array that keeps track of
294
* acquired sets for subsequent releasing.
297
PGReserveSemaphores(int maxSemas, int port)
299
maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
300
mySemaSets = (IpcSemaphoreId *)
301
malloc(maxSemaSets * sizeof(IpcSemaphoreId));
302
if (mySemaSets == NULL)
303
elog(PANIC, "out of memory");
305
nextSemaKey = port * 1000;
306
nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st
309
on_shmem_exit(ReleaseSemaphores, 0);
313
* Release semaphores at shutdown or shmem reinitialization
315
* (called as an on_shmem_exit callback, hence funny argument list)
318
ReleaseSemaphores(int status, Datum arg)
322
for (i = 0; i < numSemaSets; i++)
323
IpcSemaphoreKill(mySemaSets[i]);
330
* Initialize a PGSemaphore structure to represent a sema with count 1
333
PGSemaphoreCreate(PGSemaphore sema)
335
/* Can't do this in a backend, because static state is postmaster's */
336
Assert(!IsUnderPostmaster);
338
if (nextSemaNumber >= SEMAS_PER_SET)
340
/* Time to allocate another semaphore set */
341
if (numSemaSets >= maxSemaSets)
342
elog(PANIC, "too many semaphores created");
343
mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
347
/* Assign the next free semaphore in the current set */
348
sema->semId = mySemaSets[numSemaSets - 1];
349
sema->semNum = nextSemaNumber++;
350
/* Initialize it to count 1 */
351
IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
357
* Reset a previously-initialized PGSemaphore to have count 0
360
PGSemaphoreReset(PGSemaphore sema)
362
IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
368
* Lock a semaphore (decrement count), blocking if count would be < 0
371
PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
376
sops.sem_op = -1; /* decrement */
378
sops.sem_num = sema->semNum;
381
* Note: if errStatus is -1 and errno == EINTR then it means we
382
* returned from the operation prematurely because we were sent a
383
* signal. So we try and lock the semaphore again.
385
* Each time around the loop, we check for a cancel/die interrupt. We
386
* assume that if such an interrupt comes in while we are waiting, it
387
* will cause the semop() call to exit with errno == EINTR, so that we
388
* will be able to service the interrupt (if not in a critical section
391
* Once we acquire the lock, we do NOT check for an interrupt before
392
* returning. The caller needs to be able to record ownership of the
393
* lock before any interrupt can be accepted.
395
* There is a window of a few instructions between CHECK_FOR_INTERRUPTS
396
* and entering the semop() call. If a cancel/die interrupt occurs in
397
* that window, we would fail to notice it until after we acquire the
398
* lock (or get another interrupt to escape the semop()). We can
399
* avoid this problem by temporarily setting ImmediateInterruptOK to
400
* true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
401
* this interval will execute directly. However, there is a huge
402
* pitfall: there is another window of a few instructions after the
403
* semop() before we are able to reset ImmediateInterruptOK. If an
404
* interrupt occurs then, we'll lose control, which means that the
405
* lock has been acquired but our caller did not get a chance to
406
* record the fact. Therefore, we only set ImmediateInterruptOK if the
407
* caller tells us it's OK to do so, ie, the caller does not need to
408
* record acquiring the lock. (This is currently true for lockmanager
409
* locks, since the process that granted us the lock did all the
410
* necessary state updates. It's not true for SysV semaphores used to
411
* implement LW locks or emulate spinlocks --- but the wait time for
412
* such locks should not be very long, anyway.)
416
ImmediateInterruptOK = interruptOK;
417
CHECK_FOR_INTERRUPTS();
418
errStatus = semop(sema->semId, &sops, 1);
419
ImmediateInterruptOK = false;
420
} while (errStatus < 0 && errno == EINTR);
423
elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
429
* Unlock a semaphore (increment count)
432
PGSemaphoreUnlock(PGSemaphore sema)
437
sops.sem_op = 1; /* increment */
439
sops.sem_num = sema->semNum;
442
* Note: if errStatus is -1 and errno == EINTR then it means we
443
* returned from the operation prematurely because we were sent a
444
* signal. So we try and unlock the semaphore again. Not clear this
445
* can really happen, but might as well cope.
449
errStatus = semop(sema->semId, &sops, 1);
450
} while (errStatus < 0 && errno == EINTR);
453
elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
459
* Lock a semaphore only if able to do so without blocking
462
PGSemaphoreTryLock(PGSemaphore sema)
467
sops.sem_op = -1; /* decrement */
468
sops.sem_flg = IPC_NOWAIT; /* but don't block */
469
sops.sem_num = sema->semNum;
472
* Note: if errStatus is -1 and errno == EINTR then it means we
473
* returned from the operation prematurely because we were sent a
474
* signal. So we try and lock the semaphore again.
478
errStatus = semop(sema->semId, &sops, 1);
479
} while (errStatus < 0 && errno == EINTR);
483
/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
486
return false; /* failed to lock it */
488
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
489
if (errno == EWOULDBLOCK)
490
return false; /* failed to lock it */
492
/* Otherwise we got trouble */
493
elog(FATAL, "semop(id=%d) failed: %m", sema->semId);