1
/* cmp - compare two files byte by byte
3
Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4
2002 Free Software Foundation, Inc.
6
This program is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
11
This program is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14
See the GNU General Public License for more details.
16
You should have received a copy of the GNU General Public License
17
along with this program; see the file COPYING.
18
If not, write to the Free Software Foundation,
19
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30
#include <hard-locale.h>
36
#if defined LC_MESSAGES && ENABLE_NLS
37
# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
39
# define hard_locale_LC_MESSAGES 0
42
static char const authorship_msgid[] =
43
N_("Written by Torbjorn Granlund and David MacKenzie.");
45
static char const copyright_string[] =
46
"Copyright (C) 2002 Free Software Foundation, Inc.";
48
extern char const version_string[];
50
static int cmp (void);
51
static off_t file_position (int);
52
static size_t block_compare (word const *, word const *);
53
static size_t block_compare_and_count (word const *, word const *, off_t *);
54
static void sprintc (char *, unsigned char);
56
/* Name under which this program was invoked. */
59
/* Filenames of the compared files. */
60
static char const *file[2];
62
/* File descriptors of the files. */
63
static int file_desc[2];
65
/* Status of the files. */
66
static struct stat stat_buf[2];
68
/* Read buffers for the files. */
69
static word *buffer[2];
71
/* Optimal block size for the files. */
72
static size_t buf_size;
74
/* Initial prefix to ignore for each file. */
75
static off_t ignore_initial[2];
77
/* Number of bytes to compare. */
78
static uintmax_t bytes = UINTMAX_MAX;
81
static enum comparison_type
83
type_first_diff, /* Print the first difference. */
84
type_all_diffs, /* Print all differences. */
85
type_status /* Exit status only. */
88
/* If nonzero, print values of bytes quoted like cat -t does. */
89
static bool opt_print_bytes;
91
/* Values for long options that do not have single-letter equivalents. */
94
HELP_OPTION = CHAR_MAX + 1
97
static struct option const long_options[] =
99
{"print-bytes", 0, 0, 'b'},
100
{"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
101
{"ignore-initial", 1, 0, 'i'},
102
{"verbose", 0, 0, 'l'},
103
{"bytes", 1, 0, 'n'},
104
{"silent", 0, 0, 's'},
105
{"quiet", 0, 0, 's'},
106
{"version", 0, 0, 'v'},
107
{"help", 0, 0, HELP_OPTION},
111
static void try_help (char const *, char const *) __attribute__((noreturn));
113
try_help (char const *reason_msgid, char const *operand)
116
error (0, 0, _(reason_msgid), operand);
117
error (EXIT_TROUBLE, 0,
118
_("Try `%s --help' for more information."), program_name);
122
static char const valid_suffixes[] = "kKMGTPEZY0";
124
/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
125
point after the operand. If DELIMITER is nonzero, the operand may
126
be followed by DELIMITER; otherwise it must be null-terminated. */
128
parse_ignore_initial (char **argptr, char delimiter)
132
char const *arg = *argptr;
133
strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
134
if (! (e == LONGINT_OK
135
|| (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
136
|| (o = val) < 0 || o != val || val == UINTMAX_MAX)
137
try_help ("invalid --ignore-initial value `%s'", arg);
141
/* Specify the output format. */
143
specify_comparison_type (enum comparison_type t)
146
try_help ("options -l and -s are incompatible", 0);
154
error (EXIT_TROUBLE, 0, "%s", _("write failed"));
155
else if (fclose (stdout) != 0)
156
error (EXIT_TROUBLE, errno, "%s", _("standard output"));
159
static char const * const option_help_msgid[] = {
160
N_("-b --print-bytes Print differing bytes."),
161
N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
162
N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
163
N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
164
N_("-l --verbose Output byte numbers and values of all differing bytes."),
165
N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
166
N_("-s --quiet --silent Output nothing; yield exit status only."),
167
N_("-v --version Output version info."),
168
N_("--help Output this help."),
175
char const * const *p;
177
printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
179
printf ("%s\n\n", _("Compare two files byte by byte."));
180
for (p = option_help_msgid; *p; p++)
181
printf (" %s\n", _(*p));
182
printf ("\n%s\n%s\n\n%s\n\n%s\n",
183
_("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
184
_("SKIP values may be followed by the following multiplicative suffixes:\n\
185
kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
186
GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
187
_("If a FILE is `-' or missing, read standard input."),
188
_("Report bugs to <bug-gnu-utils@gnu.org>."));
192
main (int argc, char **argv)
194
int c, f, exit_status;
195
size_t words_per_buffer;
197
exit_failure = EXIT_TROUBLE;
198
initialize_main (&argc, &argv);
199
program_name = argv[0];
200
setlocale (LC_ALL, "");
201
bindtextdomain (PACKAGE, LOCALEDIR);
202
textdomain (PACKAGE);
203
c_stack_action (c_stack_die);
205
/* Parse command line options. */
207
while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
212
case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
217
ignore_initial[0] = parse_ignore_initial (&optarg, ':');
218
ignore_initial[1] = (*optarg++ == ':'
219
? parse_ignore_initial (&optarg, 0)
220
: ignore_initial[0]);
224
specify_comparison_type (type_all_diffs);
230
if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
231
try_help ("invalid --bytes value `%s'", optarg);
238
specify_comparison_type (type_status);
242
printf ("cmp %s\n%s\n\n%s\n\n%s\n",
243
version_string, copyright_string,
244
_(free_software_msgid), _(authorship_msgid));
258
try_help ("missing operand after `%s'", argv[argc - 1]);
260
file[0] = argv[optind++];
261
file[1] = optind < argc ? argv[optind++] : "-";
263
for (f = 0; f < 2 && optind < argc; f++)
265
char *arg = argv[optind++];
266
ignore_initial[f] = parse_ignore_initial (&arg, 0);
270
try_help ("extra operand `%s'", argv[optind]);
272
for (f = 0; f < 2; f++)
274
/* If file[1] is "-", treat it first; this avoids a misdiagnostic if
275
stdin is closed and opening file[0] yields file descriptor 0. */
276
int f1 = f ^ (strcmp (file[1], "-") == 0);
278
/* Two files with the same name are identical.
279
But wait until we open the file once, for proper diagnostics. */
280
if (f && file_name_cmp (file[0], file[1]) == 0)
283
file_desc[f1] = (strcmp (file[f1], "-") == 0
285
: open (file[f1], O_RDONLY, 0));
286
if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
288
if (file_desc[f1] < 0 && comparison_type == type_status)
291
error (EXIT_TROUBLE, errno, "%s", file[f1]);
294
set_binary_mode (file_desc[f1], 1);
297
/* If the files are links to the same inode and have the same file position,
298
they are identical. */
300
if (0 < same_file (&stat_buf[0], &stat_buf[1])
301
&& same_file_attributes (&stat_buf[0], &stat_buf[1])
302
&& file_position (0) == file_position (1))
305
/* If output is redirected to the null device, we may assume `-s'. */
307
if (comparison_type != type_status)
309
struct stat outstat, nullstat;
311
if (fstat (STDOUT_FILENO, &outstat) == 0
312
&& stat (NULL_DEVICE, &nullstat) == 0
313
&& 0 < same_file (&outstat, &nullstat))
314
comparison_type = type_status;
317
/* If only a return code is needed,
318
and if both input descriptors are associated with plain files,
319
conclude that the files differ if they have different sizes
320
and if more bytes will be compared than are in the smaller file. */
322
if (comparison_type == type_status
323
&& S_ISREG (stat_buf[0].st_mode)
324
&& S_ISREG (stat_buf[1].st_mode))
326
off_t s0 = stat_buf[0].st_size - file_position (0);
327
off_t s1 = stat_buf[1].st_size - file_position (1);
332
if (s0 != s1 && MIN (s0, s1) < bytes)
336
/* Get the optimal block size of the files. */
338
buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
339
STAT_BLOCKSIZE (stat_buf[1]),
340
PTRDIFF_MAX - sizeof (word));
342
/* Allocate word-aligned buffers, with space for sentinels at the end. */
344
words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
345
buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
346
buffer[1] = buffer[0] + words_per_buffer;
348
exit_status = cmp ();
350
for (f = 0; f < 2; f++)
351
if (close (file_desc[f]) != 0)
352
error (EXIT_TROUBLE, errno, "%s", file[f]);
353
if (exit_status != 0 && comparison_type != type_status)
359
/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
360
using `buffer[0]' and `buffer[1]'.
361
Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
367
off_t line_number = 1; /* Line number (1...) of difference. */
368
off_t byte_number = 1; /* Byte number (1...) of difference. */
369
uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
370
size_t read0, read1; /* Number of bytes read from each file. */
371
size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
372
size_t smaller; /* The lesser of `read0' and `read1'. */
373
word *buffer0 = buffer[0];
374
word *buffer1 = buffer[1];
375
char *buf0 = (char *) buffer0;
376
char *buf1 = (char *) buffer1;
377
int ret = EXIT_SUCCESS;
381
if (comparison_type == type_all_diffs)
383
off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
385
for (f = 0; f < 2; f++)
386
if (S_ISREG (stat_buf[f].st_mode))
388
off_t file_bytes = stat_buf[f].st_size - file_position (f);
389
if (file_bytes < byte_number_max)
390
byte_number_max = file_bytes;
393
for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
397
for (f = 0; f < 2; f++)
399
off_t ig = ignore_initial[f];
400
if (ig && file_position (f) == -1)
402
/* lseek failed; read and discard the ignored initial prefix. */
405
size_t bytes_to_read = MIN (ig, buf_size);
406
size_t r = block_read (file_desc[f], buf0, bytes_to_read);
407
if (r != bytes_to_read)
410
error (EXIT_TROUBLE, errno, "%s", file[f]);
421
size_t bytes_to_read = buf_size;
423
if (remaining != UINTMAX_MAX)
425
if (remaining < bytes_to_read)
426
bytes_to_read = remaining;
427
remaining -= bytes_to_read;
430
read0 = block_read (file_desc[0], buf0, bytes_to_read);
431
if (read0 == SIZE_MAX)
432
error (EXIT_TROUBLE, errno, "%s", file[0]);
433
read1 = block_read (file_desc[1], buf1, bytes_to_read);
434
if (read1 == SIZE_MAX)
435
error (EXIT_TROUBLE, errno, "%s", file[1]);
437
/* Insert sentinels for the block compare. */
439
buf0[read0] = ~buf1[read0];
440
buf1[read1] = ~buf0[read1];
442
/* If the line number should be written for differing files,
443
compare the blocks and count the number of newlines
445
first_diff = (comparison_type == type_first_diff
446
? block_compare_and_count (buffer0, buffer1, &line_number)
447
: block_compare (buffer0, buffer1));
449
byte_number += first_diff;
450
smaller = MIN (read0, read1);
452
if (first_diff < smaller)
454
switch (comparison_type)
456
case type_first_diff:
458
char byte_buf[INT_BUFSIZE_BOUND (off_t)];
459
char line_buf[INT_BUFSIZE_BOUND (off_t)];
460
char const *byte_num = offtostr (byte_number, byte_buf);
461
char const *line_num = offtostr (line_number, line_buf);
462
if (!opt_print_bytes)
464
/* See POSIX 1003.1-2001 for this format. This
465
message is used only in the POSIX locale, so it
466
need not be translated. */
467
static char const char_message[] =
468
"%s %s differ: char %s, line %s\n";
470
/* The POSIX rationale recommends using the word
471
"byte" outside the POSIX locale. Some gettext
472
implementations translate even in the POSIX
473
locale if certain other environment variables
474
are set, so use "byte" if a translation is
475
available, or if outside the POSIX locale. */
476
static char const byte_msgid[] =
477
N_("%s %s differ: byte %s, line %s\n");
478
char const *byte_message = _(byte_msgid);
479
bool use_byte_message = (byte_message != byte_msgid
480
|| hard_locale_LC_MESSAGES);
482
printf ((use_byte_message
484
: "%s %s differ: char %s, line %s\n"),
485
file[0], file[1], byte_num, line_num);
489
unsigned char c0 = buf0[first_diff];
490
unsigned char c1 = buf1[first_diff];
495
printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
496
file[0], file[1], byte_num, line_num,
507
unsigned char c0 = buf0[first_diff];
508
unsigned char c1 = buf1[first_diff];
511
char byte_buf[INT_BUFSIZE_BOUND (off_t)];
512
char const *byte_num = offtostr (byte_number, byte_buf);
513
if (!opt_print_bytes)
515
/* See POSIX 1003.1-2001 for this format. */
516
printf ("%*s %3o %3o\n",
517
offset_width, byte_num, c0, c1);
525
printf ("%*s %3o %-4s %3o %s\n",
526
offset_width, byte_num, c0, s0, c1, s1);
532
while (first_diff < smaller);
540
if (comparison_type != type_status)
542
/* See POSIX 1003.1-2001 for this format. */
543
fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
549
while (read0 == buf_size);
554
/* Compare two blocks of memory P0 and P1 until they differ,
555
and count the number of '\n' occurrences in the common
557
If the blocks are not guaranteed to be different, put sentinels at the ends
558
of the blocks before calling this function.
560
Return the offset of the first byte that differs.
561
Increment *COUNT by the count of '\n' occurrences. */
564
block_compare_and_count (word const *p0, word const *p1, off_t *count)
566
word l; /* One word from first buffer. */
567
word const *l0, *l1; /* Pointers into each buffer. */
568
char const *c0, *c1; /* Pointers for finding exact address. */
569
size_t cnt = 0; /* Number of '\n' occurrences. */
570
word nnnn; /* Newline, sizeof (word) times. */
574
for (i = 0; i < sizeof nnnn; i++)
575
nnnn = (nnnn << CHAR_BIT) | '\n';
577
/* Find the rough position of the first difference by reading words,
580
for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
583
for (i = 0; i < sizeof l; i++)
585
cnt += ! (unsigned char) l;
590
/* Find the exact differing position (endianness independent). */
592
for (c0 = (char const *) l0, c1 = (char const *) l1;
598
return c0 - (char const *) p0;
601
/* Compare two blocks of memory P0 and P1 until they differ.
602
If the blocks are not guaranteed to be different, put sentinels at the ends
603
of the blocks before calling this function.
605
Return the offset of the first byte that differs. */
608
block_compare (word const *p0, word const *p1)
613
/* Find the rough position of the first difference by reading words,
616
for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
619
/* Find the exact differing position (endianness independent). */
621
for (c0 = (char const *) l0, c1 = (char const *) l1;
626
return c0 - (char const *) p0;
629
/* Put into BUF the unsigned char C, making unprintable bytes
630
visible by quoting like cat -t does. */
633
sprintc (char *buf, unsigned char c)
659
/* Position file F to ignore_initial[F] bytes from its initial position,
660
and yield its new position. Don't try more than once. */
663
file_position (int f)
665
static bool positioned[2];
666
static off_t position[2];
671
position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);