1
/*-------------------------------------------------------------------------
4
* A utility to "zero out" the xlog when it's corrupt beyond recovery.
5
* Can also rebuild pg_control if needed.
7
* The theory of operation is fairly simple:
8
* 1. Read the existing pg_control (which will include the last
9
* checkpoint record). If it is an old format then update to
11
* 2. If pg_control is corrupt, attempt to intuit reasonable values,
12
* by scanning the old xlog if necessary.
13
* 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14
* record at the start of xlog.
15
* 4. Flush the existing xlog files and write a new segment with
16
* just a checkpoint record in it. The new segment is positioned
17
* just past the end of the old xlog, so that existing LSNs in
18
* data pages will appear to be "in the past".
19
* This is all pretty straightforward except for the intuition part of
23
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
24
* Portions Copyright (c) 1994, Regents of the University of California
26
* $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.28 2004-12-31 22:03:11 pgsql Exp $
28
*-------------------------------------------------------------------------
43
#include "access/xlog.h"
44
#include "access/xlog_internal.h"
45
#include "catalog/catversion.h"
46
#include "catalog/pg_control.h"
51
#define _(x) gettext((x))
54
char XLogDir[MAXPGPATH]; /* not static, see xlog_internal.h */
55
static char ControlFilePath[MAXPGPATH];
57
static ControlFileData ControlFile; /* pg_control values */
58
static uint32 newXlogId,
59
newXlogSeg; /* ID/Segment of new XLOG segment */
60
static bool guessed = false; /* T if we had to guess at any values */
61
static const char *progname;
63
static bool ReadControlFile(void);
64
static void GuessControlValues(void);
65
static void PrintControlValues(bool guessed);
66
static void RewriteControlFile(void);
67
static void KillExistingXLOG(void);
68
static void WriteEmptyXLOG(void);
69
static void usage(void);
73
main(int argc, char *argv[])
77
bool noupdate = false;
78
TransactionId set_xid = 0;
80
uint32 minXlogTli = 0,
90
set_pglocale_pgservice(argv[0], "pg_resetxlog");
92
progname = get_progname(argv[0]);
96
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
101
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
103
puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
109
while ((c = getopt(argc, argv, "fl:no:x:")) != -1)
122
set_xid = strtoul(optarg, &endptr, 0);
123
if (endptr == optarg || *endptr != '\0')
125
fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
126
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
131
fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
137
set_oid = strtoul(optarg, &endptr, 0);
138
if (endptr == optarg || *endptr != '\0')
140
fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
141
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
146
fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
152
minXlogTli = strtoul(optarg, &endptr, 0);
153
if (endptr == optarg || *endptr != ',')
155
fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
156
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
159
minXlogId = strtoul(endptr + 1, &endptr2, 0);
160
if (endptr2 == endptr + 1 || *endptr2 != ',')
162
fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
163
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
166
minXlogSeg = strtoul(endptr2 + 1, &endptr3, 0);
167
if (endptr3 == endptr2 + 1 || *endptr3 != '\0')
169
fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
170
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
176
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
183
fprintf(stderr, _("%s: no data directory specified\n"), progname);
184
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
189
* Don't allow pg_resetxlog to be run as root, to avoid
190
* overwriting the ownership of files in the data directory. We
191
* need only check for root -- any other user won't have
192
* sufficient permissions to modify files in the data directory.
195
#ifndef __BEOS__ /* no root check on BeOS */
198
fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
200
fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
207
DataDir = argv[optind];
208
snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
209
snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
212
* Check for a postmaster lock file --- if there is one, refuse to
213
* proceed, on grounds we might be interfering with a live
216
snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
218
if ((fd = open(path, O_RDONLY)) < 0)
222
fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, path, strerror(errno));
228
fprintf(stderr, _("%s: lock file \"%s\" exists\n"
229
"Is a server running? If not, delete the lock file and try again.\n"),
235
* Attempt to read the existing pg_control file
237
if (!ReadControlFile())
238
GuessControlValues();
241
* Adjust fields if required by switches. (Do this now so that
242
* printout, if any, includes these values.)
245
ControlFile.checkPointCopy.nextXid = set_xid;
248
ControlFile.checkPointCopy.nextOid = set_oid;
250
if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
251
ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
253
if (minXlogId > ControlFile.logId ||
254
(minXlogId == ControlFile.logId &&
255
minXlogSeg > ControlFile.logSeg))
257
ControlFile.logId = minXlogId;
258
ControlFile.logSeg = minXlogSeg;
262
* If we had to guess anything, and -f was not given, just print the
263
* guessed values and exit. Also print if -n is given.
265
if ((guessed && !force) || noupdate)
267
PrintControlValues(guessed);
270
printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
278
* Don't reset from a dirty pg_control without -f, either.
280
if (ControlFile.state != DB_SHUTDOWNED && !force)
282
printf(_("The database server was not shut down cleanly.\n"
283
"Resetting the transaction log may cause data to be lost.\n"
284
"If you want to proceed anyway, use -f to force reset.\n"));
289
* Else, do the dirty deed.
291
RewriteControlFile();
295
printf(_("Transaction log reset\n"));
301
* Try to read the existing pg_control file.
303
* This routine is also responsible for updating old pg_control versions
304
* to the current format. (Currently we don't do anything of the sort.)
307
ReadControlFile(void)
314
if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
317
* If pg_control is not there at all, or we can't read it, the
318
* odds are we've been handed a bad DataDir path, so give up. User
319
* can do "touch pg_control" to force us to proceed.
321
fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
322
progname, ControlFilePath, strerror(errno));
324
fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
331
/* Use malloc to ensure we have a maxaligned buffer */
332
buffer = (char *) malloc(BLCKSZ);
334
len = read(fd, buffer, BLCKSZ);
337
fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
338
progname, ControlFilePath, strerror(errno));
343
if (len >= sizeof(ControlFileData) &&
344
((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
349
buffer + sizeof(crc64),
350
sizeof(ControlFileData) - sizeof(crc64));
353
if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
356
memcpy(&ControlFile, buffer, sizeof(ControlFile));
360
fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
362
/* We will use the data anyway, but treat it as guessed. */
363
memcpy(&ControlFile, buffer, sizeof(ControlFile));
368
/* Looks like it's a mess. */
369
fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
376
* Guess at pg_control values when we can't read the old ones.
379
GuessControlValues(void)
381
uint64 sysidentifier;
386
* Set up a completely default set of pg_control values.
389
memset(&ControlFile, 0, sizeof(ControlFile));
391
ControlFile.pg_control_version = PG_CONTROL_VERSION;
392
ControlFile.catalog_version_no = CATALOG_VERSION_NO;
395
* Create a new unique installation identifier, since we can no longer
396
* use any old XLOG records. See notes in xlog.c about the algorithm.
398
gettimeofday(&tv, NULL);
399
sysidentifier = ((uint64) tv.tv_sec) << 32;
400
sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
402
ControlFile.system_identifier = sysidentifier;
404
ControlFile.checkPointCopy.redo.xlogid = 0;
405
ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
406
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
407
ControlFile.checkPointCopy.ThisTimeLineID = 1;
408
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
409
ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
410
ControlFile.checkPointCopy.time = time(NULL);
412
ControlFile.state = DB_SHUTDOWNED;
413
ControlFile.time = time(NULL);
414
ControlFile.logId = 0;
415
ControlFile.logSeg = 1;
416
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
418
ControlFile.blcksz = BLCKSZ;
419
ControlFile.relseg_size = RELSEG_SIZE;
420
ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
421
ControlFile.nameDataLen = NAMEDATALEN;
422
ControlFile.funcMaxArgs = FUNC_MAX_ARGS;
423
#ifdef HAVE_INT64_TIMESTAMP
424
ControlFile.enableIntTimes = TRUE;
426
ControlFile.enableIntTimes = FALSE;
428
ControlFile.localeBuflen = LOCALE_NAME_BUFLEN;
430
localeptr = setlocale(LC_COLLATE, "");
433
fprintf(stderr, _("%s: invalid LC_COLLATE setting\n"), progname);
436
StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
437
localeptr = setlocale(LC_CTYPE, "");
440
fprintf(stderr, _("%s: invalid LC_CTYPE setting\n"), progname);
443
StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
446
* XXX eventually, should try to grovel through old XLOG to develop
447
* more accurate values for TimeLineID, nextXID, and nextOID.
453
* Print the guessed pg_control values when we had to guess.
455
* NB: this display should be just those fields that will not be
456
* reset by RewriteControlFile().
459
PrintControlValues(bool guessed)
461
char sysident_str[32];
464
printf(_("Guessed pg_control values:\n\n"));
466
printf(_("pg_control values:\n\n"));
469
* Format system_identifier separately to keep platform-dependent
470
* format code out of the translatable message string.
472
snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
473
ControlFile.system_identifier);
475
printf(_("pg_control version number: %u\n"), ControlFile.pg_control_version);
476
printf(_("Catalog version number: %u\n"), ControlFile.catalog_version_no);
477
printf(_("Database system identifier: %s\n"), sysident_str);
478
printf(_("Current log file ID: %u\n"), ControlFile.logId);
479
printf(_("Next log file segment: %u\n"), ControlFile.logSeg);
480
printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
481
printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
482
printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
483
printf(_("Database block size: %u\n"), ControlFile.blcksz);
484
printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
485
printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen);
486
printf(_("Maximum number of function arguments: %u\n"), ControlFile.funcMaxArgs);
487
printf(_("Date/time type storage: %s\n"),
488
(ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
489
printf(_("Maximum length of locale name: %u\n"), ControlFile.localeBuflen);
490
printf(_("LC_COLLATE: %s\n"), ControlFile.lc_collate);
491
printf(_("LC_CTYPE: %s\n"), ControlFile.lc_ctype);
496
* Write out the new pg_control file.
499
RewriteControlFile(void)
502
char buffer[BLCKSZ]; /* need not be aligned */
505
* Adjust fields as needed to force an empty XLOG starting at the next
508
newXlogId = ControlFile.logId;
509
newXlogSeg = ControlFile.logSeg;
511
/* adjust in case we are changing segment size */
512
newXlogSeg *= ControlFile.xlog_seg_size;
513
newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
515
/* be sure we wrap around correctly at end of a logfile */
516
NextLogSeg(newXlogId, newXlogSeg);
518
/* Now we can force the recorded xlog seg size to the right thing. */
519
ControlFile.xlog_seg_size = XLogSegSize;
521
ControlFile.checkPointCopy.redo.xlogid = newXlogId;
522
ControlFile.checkPointCopy.redo.xrecoff =
523
newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
524
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
525
ControlFile.checkPointCopy.time = time(NULL);
527
ControlFile.state = DB_SHUTDOWNED;
528
ControlFile.time = time(NULL);
529
ControlFile.logId = newXlogId;
530
ControlFile.logSeg = newXlogSeg + 1;
531
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
532
ControlFile.prevCheckPoint.xlogid = 0;
533
ControlFile.prevCheckPoint.xrecoff = 0;
535
/* Contents are protected with a CRC */
536
INIT_CRC64(ControlFile.crc);
537
COMP_CRC64(ControlFile.crc,
538
(char *) &ControlFile + sizeof(crc64),
539
sizeof(ControlFileData) - sizeof(crc64));
540
FIN_CRC64(ControlFile.crc);
543
* We write out BLCKSZ bytes into pg_control, zero-padding the excess
544
* over sizeof(ControlFileData). This reduces the odds of
545
* premature-EOF errors when reading pg_control. We'll still fail
546
* when we check the contents of the file, but hopefully with a more
547
* specific error than "couldn't read pg_control".
549
if (sizeof(ControlFileData) > BLCKSZ)
552
_("%s: internal error -- sizeof(ControlFileData) is too large ... fix xlog.c\n"),
557
memset(buffer, 0, BLCKSZ);
558
memcpy(buffer, &ControlFile, sizeof(ControlFileData));
560
unlink(ControlFilePath);
562
fd = open(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
565
fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
566
progname, strerror(errno));
571
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
573
/* if write didn't set errno, assume problem is no disk space */
576
fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
577
progname, strerror(errno));
583
fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
592
* Remove existing XLOG files
595
KillExistingXLOG(void)
599
char path[MAXPGPATH];
601
xldir = opendir(XLogDir);
604
fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
605
progname, XLogDir, strerror(errno));
610
while ((xlde = readdir(xldir)) != NULL)
612
if (strlen(xlde->d_name) == 24 &&
613
strspn(xlde->d_name, "0123456789ABCDEF") == 24)
615
snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
616
if (unlink(path) < 0)
618
fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
619
progname, path, strerror(errno));
628
* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
629
* not in released version
631
if (GetLastError() == ERROR_NO_MORE_FILES)
637
fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
638
progname, XLogDir, strerror(errno));
646
* Write an empty XLOG file, containing only the checkpoint record
647
* already set up in ControlFile.
654
XLogLongPageHeader longpage;
657
char path[MAXPGPATH];
661
/* Use malloc() to ensure buffer is MAXALIGNED */
662
buffer = (char *) malloc(BLCKSZ);
663
page = (XLogPageHeader) buffer;
664
memset(buffer, 0, BLCKSZ);
666
/* Set up the XLOG page header */
667
page->xlp_magic = XLOG_PAGE_MAGIC;
668
page->xlp_info = XLP_LONG_HEADER;
669
page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
670
page->xlp_pageaddr.xlogid =
671
ControlFile.checkPointCopy.redo.xlogid;
672
page->xlp_pageaddr.xrecoff =
673
ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
674
longpage = (XLogLongPageHeader) page;
675
longpage->xlp_sysid = ControlFile.system_identifier;
676
longpage->xlp_seg_size = XLogSegSize;
678
/* Insert the initial checkpoint record */
679
record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
680
record->xl_prev.xlogid = 0;
681
record->xl_prev.xrecoff = 0;
682
record->xl_xid = InvalidTransactionId;
683
record->xl_len = sizeof(CheckPoint);
684
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
685
record->xl_rmid = RM_XLOG_ID;
686
memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
690
COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
691
COMP_CRC64(crc, (char *) record + sizeof(crc64),
692
SizeOfXLogRecord - sizeof(crc64));
694
record->xl_crc = crc;
696
/* Write the first page */
697
XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
698
newXlogId, newXlogSeg);
702
fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
706
fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
707
progname, path, strerror(errno));
712
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
714
/* if write didn't set errno, assume problem is no disk space */
717
fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
718
progname, path, strerror(errno));
722
/* Fill the rest of the file with zeroes */
723
memset(buffer, 0, BLCKSZ);
724
for (nbytes = BLCKSZ; nbytes < XLogSegSize; nbytes += BLCKSZ)
727
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
731
fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
732
progname, path, strerror(errno));
739
fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
750
printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
751
printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
752
printf(_("Options:\n"));
753
printf(_(" -f force update to be done\n"));
754
printf(_(" -l TLI,FILE,SEG force minimum WAL starting location for new transaction log\n"));
755
printf(_(" -n no update, just show extracted control values (for testing)\n"));
756
printf(_(" -o OID set next OID\n"));
757
printf(_(" -x XID set next transaction ID\n"));
758
printf(_(" --help show this help, then exit\n"));
759
printf(_(" --version output version information, then exit\n"));
760
printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));