1
/* tac - concatenate and print files in reverse
2
Copyright (C) 1988-1991, 1995-2006, 2008 Free Software Foundation, Inc.
4
This program is free software: you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation, either version 3 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
/* Written by Jay Lepreau (lepreau@cs.utah.edu).
18
GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20
/* Copy each FILE, or the standard input if none are given or when a
21
FILE name of "-" is encountered, to the standard output with the
22
order of the records reversed. The records are separated by
23
instances of a string, or a newline if none is given. By default, the
24
separator string is attached to the end of the record that it
28
-b, --before The separator is attached to the beginning
29
of the record that it precedes in the file.
30
-r, --regex The separator is a regular expression.
31
-s, --separator=separator Use SEPARATOR as the record separator.
33
To reverse a file byte by byte, use (in bash, ksh, or sh):
41
#include <sys/types.h>
49
#include "safe-read.h"
53
/* The official name of this program (e.g., no `g' prefix). */
54
#define PROGRAM_NAME "tac"
57
proper_name ("Jay Lepreau"), \
58
proper_name ("David MacKenzie")
60
#if defined __MSDOS__ || defined _WIN32
61
/* Define this to non-zero on systems for which the regular mechanism
62
(of unlinking an open file and expecting to be able to write, seek
63
back to the beginning, then reread it) doesn't work. E.g., on Windows
65
# define DONT_UNLINK_WHILE_OPEN 1
69
#ifndef DEFAULT_TMPDIR
70
# define DEFAULT_TMPDIR "/tmp"
73
/* The number of bytes per atomic read. */
74
#define INITIAL_READSIZE 8192
76
/* The number of bytes per atomic write. */
77
#define WRITESIZE 8192
79
/* The string that separates the records of the file. */
80
static char const *separator;
82
/* True if we have ever read standard input. */
83
static bool have_read_stdin = false;
85
/* If true, print `separator' along with the record preceding it
86
in the file; otherwise with the record following it. */
87
static bool separator_ends_record;
89
/* 0 if `separator' is to be matched as a regular expression;
90
otherwise, the length of `separator', used as a sentinel to
92
static size_t sentinel_length;
94
/* The length of a match with `separator'. If `sentinel_length' is 0,
95
`match_length' is computed every time a match succeeds;
96
otherwise, it is simply the length of `separator'. */
97
static size_t match_length;
99
/* The input buffer. */
100
static char *G_buffer;
102
/* The number of bytes to read at once into `buffer'. */
103
static size_t read_size;
105
/* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
106
The extra 2 bytes allow `past_end' to have a value beyond the
107
end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
108
static size_t G_buffer_size;
110
/* The compiled regular expression representing `separator'. */
111
static struct re_pattern_buffer compiled_separator;
112
static char compiled_separator_fastmap[UCHAR_MAX + 1];
113
static struct re_registers regs;
115
static struct option const longopts[] =
117
{"before", no_argument, NULL, 'b'},
118
{"regex", no_argument, NULL, 'r'},
119
{"separator", required_argument, NULL, 's'},
120
{GETOPT_HELP_OPTION_DECL},
121
{GETOPT_VERSION_OPTION_DECL},
128
if (status != EXIT_SUCCESS)
129
fprintf (stderr, _("Try `%s --help' for more information.\n"),
134
Usage: %s [OPTION]... [FILE]...\n\
138
Write each FILE to standard output, last line first.\n\
139
With no FILE, or when FILE is -, read standard input.\n\
143
Mandatory arguments to long options are mandatory for short options too.\n\
146
-b, --before attach the separator before instead of after\n\
147
-r, --regex interpret the separator as a regular expression\n\
148
-s, --separator=STRING use STRING as the separator instead of newline\n\
150
fputs (HELP_OPTION_DESCRIPTION, stdout);
151
fputs (VERSION_OPTION_DESCRIPTION, stdout);
152
emit_bug_reporting_address ();
157
/* Print the characters from START to PAST_END - 1.
158
If START is NULL, just flush the buffer. */
161
output (const char *start, const char *past_end)
163
static char buffer[WRITESIZE];
164
static size_t bytes_in_buffer = 0;
165
size_t bytes_to_add = past_end - start;
166
size_t bytes_available = WRITESIZE - bytes_in_buffer;
170
fwrite (buffer, 1, bytes_in_buffer, stdout);
175
/* Write out as many full buffers as possible. */
176
while (bytes_to_add >= bytes_available)
178
memcpy (buffer + bytes_in_buffer, start, bytes_available);
179
bytes_to_add -= bytes_available;
180
start += bytes_available;
181
fwrite (buffer, 1, WRITESIZE, stdout);
183
bytes_available = WRITESIZE;
186
memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
187
bytes_in_buffer += bytes_to_add;
190
/* Print in reverse the file open on descriptor FD for reading FILE.
191
Return true if successful. */
194
tac_seekable (int input_fd, const char *file)
196
/* Pointer to the location in `G_buffer' where the search for
197
the next separator will begin. */
200
/* Pointer to one past the rightmost character in `G_buffer' that
201
has not been printed yet. */
204
/* Length of the record growing in `G_buffer'. */
205
size_t saved_record_size;
207
/* Offset in the file of the next read. */
210
/* True if `output' has not been called yet for any file.
211
Only used when the separator is attached to the preceding record. */
212
bool first_time = true;
213
char first_char = *separator; /* Speed optimization, non-regexp. */
214
char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
215
size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
217
/* Find the size of the input file. */
218
file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
220
return true; /* It's an empty file. */
222
/* Arrange for the first read to lop off enough to leave the rest of the
223
file a multiple of `read_size'. Since `read_size' can change, this may
224
not always hold during the program run, but since it usually will, leave
225
it here for i/o efficiency (page/sector boundaries and all that).
226
Note: the efficiency gain has not been verified. */
227
saved_record_size = file_pos % read_size;
228
if (saved_record_size == 0)
229
saved_record_size = read_size;
230
file_pos -= saved_record_size;
231
/* `file_pos' now points to the start of the last (probably partial) block
232
in the input file. */
234
if (lseek (input_fd, file_pos, SEEK_SET) < 0)
235
error (0, errno, _("%s: seek failed"), quotearg_colon (file));
237
if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
239
error (0, errno, _("%s: read error"), quotearg_colon (file));
243
match_start = past_end = G_buffer + saved_record_size;
244
/* For non-regexp search, move past impossible positions for a match. */
246
match_start -= match_length1;
250
/* Search backward from `match_start' - 1 to `G_buffer' for a match
251
with `separator'; for speed, use strncmp if `separator' contains no
253
If the match succeeds, set `match_start' to point to the start of
254
the match and `match_length' to the length of the match.
255
Otherwise, make `match_start' < `G_buffer'. */
256
if (sentinel_length == 0)
258
size_t i = match_start - G_buffer;
260
regoff_t range = 1 - ri;
264
error (EXIT_FAILURE, 0, _("record too large"));
267
|| ((ret = re_search (&compiled_separator, G_buffer,
268
i, i - 1, range, ®s))
270
match_start = G_buffer - 1;
273
error (EXIT_FAILURE, 0,
274
_("error in regular expression search"));
278
match_start = G_buffer + regs.start[0];
279
match_length = regs.end[0] - regs.start[0];
284
/* `match_length' is constant for non-regexp boundaries. */
285
while (*--match_start != first_char
286
|| (match_length1 && strncmp (match_start + 1, separator1,
291
/* Check whether we backed off the front of `G_buffer' without finding
292
a match for `separator'. */
293
if (match_start < G_buffer)
297
/* Hit the beginning of the file; print the remaining record. */
298
output (G_buffer, past_end);
302
saved_record_size = past_end - G_buffer;
303
if (saved_record_size > read_size)
305
/* `G_buffer_size' is about twice `read_size', so since
306
we want to read in another `read_size' bytes before
307
the data already in `G_buffer', we need to increase
310
size_t offset = sentinel_length ? sentinel_length : 1;
311
ptrdiff_t match_start_offset = match_start - G_buffer;
312
ptrdiff_t past_end_offset = past_end - G_buffer;
313
size_t old_G_buffer_size = G_buffer_size;
316
G_buffer_size = read_size * 2 + sentinel_length + 2;
317
if (G_buffer_size < old_G_buffer_size)
319
newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
321
/* Adjust the pointers for the new buffer location. */
322
match_start = newbuffer + match_start_offset;
323
past_end = newbuffer + past_end_offset;
324
G_buffer = newbuffer;
327
/* Back up to the start of the next bufferfull of the file. */
328
if (file_pos >= read_size)
329
file_pos -= read_size;
332
read_size = file_pos;
335
if (lseek (input_fd, file_pos, SEEK_SET) < 0)
336
error (0, errno, _("%s: seek failed"), quotearg_colon (file));
338
/* Shift the pending record data right to make room for the new.
339
The source and destination regions probably overlap. */
340
memmove (G_buffer + read_size, G_buffer, saved_record_size);
341
past_end = G_buffer + read_size + saved_record_size;
342
/* For non-regexp searches, avoid unneccessary scanning. */
344
match_start = G_buffer + read_size;
346
match_start = past_end;
348
if (safe_read (input_fd, G_buffer, read_size) != read_size)
350
error (0, errno, _("%s: read error"), quotearg_colon (file));
356
/* Found a match of `separator'. */
357
if (separator_ends_record)
359
char *match_end = match_start + match_length;
361
/* If this match of `separator' isn't at the end of the
362
file, print the record. */
363
if (!first_time || match_end != past_end)
364
output (match_end, past_end);
365
past_end = match_end;
370
output (match_start, past_end);
371
past_end = match_start;
374
/* For non-regex matching, we can back up. */
375
if (sentinel_length > 0)
376
match_start -= match_length - 1;
381
#if DONT_UNLINK_WHILE_OPEN
383
/* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
384
Using atexit like this is wrong, since it can fail
385
when called e.g. 32 or more times.
386
But this isn't a big deal, since the code is used only on WOE/DOS
387
systems, and few people invoke tac on that many nonseekable files. */
389
static const char *file_to_remove;
390
static FILE *fp_to_close;
393
unlink_tempfile (void)
395
fclose (fp_to_close);
396
unlink (file_to_remove);
400
record_or_unlink_tempfile (char const *fn, FILE *fp)
406
atexit (unlink_tempfile);
413
record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
420
/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
421
a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
422
and file name. Return true if successful. */
425
copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
427
static char *template = NULL;
428
static char const *tempdir;
433
if (template == NULL)
435
char const * const Template = "%s/tacXXXXXX";
436
tempdir = getenv ("TMPDIR");
438
tempdir = DEFAULT_TMPDIR;
440
/* Subtract 2 for `%s' and add 1 for the trailing NUL byte. */
441
template = xmalloc (strlen (tempdir) + strlen (Template) - 2 + 1);
442
sprintf (template, Template, tempdir);
445
/* FIXME: there's a small window between a successful mkstemp call
446
and the unlink that's performed by record_or_unlink_tempfile.
447
If we're interrupted in that interval, this code fails to remove
448
the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
449
the window is much larger -- it extends to the atexit-called
451
FIXME: clean up upon fatal signal. Don't block them, in case
452
$TMPFILE is a remote file system. */
455
fd = mkstemp (template);
458
error (0, errno, _("cannot create temporary file in %s"),
463
tmp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
466
error (0, errno, _("cannot open %s for writing"), quote (tempfile));
472
record_or_unlink_tempfile (tempfile, tmp);
476
size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
479
if (bytes_read == SAFE_READ_ERROR)
481
error (0, errno, _("%s: read error"), quotearg_colon (file));
485
if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
487
error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
492
if (fflush (tmp) != 0)
494
error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
499
*g_tempfile = tempfile;
507
/* Copy INPUT_FD to a temporary, then tac that file.
508
Return true if successful. */
511
tac_nonseekable (int input_fd, const char *file)
515
return (copy_to_temp (&tmp_stream, &tmp_file, input_fd, file)
516
&& tac_seekable (fileno (tmp_stream), tmp_file));
519
/* Print FILE in reverse, copying it to a temporary
520
file first if it is not seekable.
521
Return true if successful. */
524
tac_file (const char *filename)
529
bool is_stdin = STREQ (filename, "-");
533
have_read_stdin = true;
535
filename = _("standard input");
536
if (O_BINARY && ! isatty (STDIN_FILENO))
537
xfreopen (NULL, "rb", stdin);
541
fd = open (filename, O_RDONLY | O_BINARY);
544
error (0, errno, _("cannot open %s for reading"), quote (filename));
549
file_size = lseek (fd, (off_t) 0, SEEK_END);
551
ok = (file_size < 0 || isatty (fd)
552
? tac_nonseekable (fd, filename)
553
: tac_seekable (fd, filename));
555
if (!is_stdin && close (fd) != 0)
557
error (0, errno, _("%s: read error"), quotearg_colon (filename));
564
main (int argc, char **argv)
566
const char *error_message; /* Return value from re_compile_pattern. */
569
size_t half_buffer_size;
571
/* Initializer for file_list if no file-arguments
572
were specified on the command line. */
573
static char const *const default_file_list[] = {"-", NULL};
574
char const *const *file;
576
initialize_main (&argc, &argv);
577
set_program_name (argv[0]);
578
setlocale (LC_ALL, "");
579
bindtextdomain (PACKAGE, LOCALEDIR);
580
textdomain (PACKAGE);
582
atexit (close_stdout);
586
separator_ends_record = true;
588
while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
593
separator_ends_record = false;
601
error (EXIT_FAILURE, 0, _("separator cannot be empty"));
603
case_GETOPT_HELP_CHAR;
604
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
606
usage (EXIT_FAILURE);
610
if (sentinel_length == 0)
612
compiled_separator.buffer = NULL;
613
compiled_separator.allocated = 0;
614
compiled_separator.fastmap = compiled_separator_fastmap;
615
compiled_separator.translate = NULL;
616
error_message = re_compile_pattern (separator, strlen (separator),
617
&compiled_separator);
619
error (EXIT_FAILURE, 0, "%s", error_message);
622
match_length = sentinel_length = strlen (separator);
624
read_size = INITIAL_READSIZE;
625
while (sentinel_length >= read_size / 2)
627
if (SIZE_MAX / 2 < read_size)
631
half_buffer_size = read_size + sentinel_length + 1;
632
G_buffer_size = 2 * half_buffer_size;
633
if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
635
G_buffer = xmalloc (G_buffer_size);
638
strcpy (G_buffer, separator);
639
G_buffer += sentinel_length;
646
file = (optind < argc
647
? (char const *const *) &argv[optind]
648
: default_file_list);
650
if (O_BINARY && ! isatty (STDOUT_FILENO))
651
xfreopen (NULL, "wb", stdout);
656
for (i = 0; file[i]; ++i)
657
ok &= tac_file (file[i]);
660
/* Flush the output buffer. */
661
output ((char *) NULL, (char *) NULL);
663
if (have_read_stdin && close (STDIN_FILENO) < 0)
664
error (EXIT_FAILURE, errno, "-");
665
exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);