1
/* quotearg.c - quote arguments for output
3
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008,
4
2009, 2010 Free Software Foundation, Inc.
6
This program is free software: you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 3 of the License, or
9
(at your option) any later version.
11
This program is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
GNU General Public License for more details.
16
You should have received a copy of the GNU General Public License
17
along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
/* Written by Paul Eggert <eggert@twinsun.com> */
37
#define _(msgid) gettext (msgid)
38
#define N_(msgid) msgid
41
# define SIZE_MAX ((size_t) -1)
44
#define INT_BITS (sizeof (int) * CHAR_BIT)
46
struct quoting_options
48
/* Basic quoting style. */
49
enum quoting_style style;
51
/* Additional flags. Bitwise combination of enum quoting_flags. */
54
/* Quote the characters indicated by this bit vector even if the
55
quoting style would not normally require them to be quoted. */
56
unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
58
/* The left quote for custom_quoting_style. */
59
char const *left_quote;
61
/* The right quote for custom_quoting_style. */
62
char const *right_quote;
65
/* Names of quoting styles. */
66
char const *const quoting_style_args[] =
79
/* Correspondences to quoting style names. */
80
enum quoting_style const quoting_style_vals[] =
82
literal_quoting_style,
84
shell_always_quoting_style,
86
c_maybe_quoting_style,
92
/* The default quoting options. */
93
static struct quoting_options default_quoting_options;
95
/* Allocate a new set of quoting options, with contents initially identical
96
to O if O is not null, or to the default if O is null.
97
It is the caller's responsibility to free the result. */
98
struct quoting_options *
99
clone_quoting_options (struct quoting_options *o)
102
struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
108
/* Get the value of O's quoting style. If O is null, use the default. */
110
get_quoting_style (struct quoting_options *o)
112
return (o ? o : &default_quoting_options)->style;
115
/* In O (or in the default if O is null),
116
set the value of the quoting style to S. */
118
set_quoting_style (struct quoting_options *o, enum quoting_style s)
120
(o ? o : &default_quoting_options)->style = s;
123
/* In O (or in the default if O is null),
124
set the value of the quoting options for character C to I.
125
Return the old value. Currently, the only values defined for I are
126
0 (the default) and 1 (which means to quote the character even if
127
it would not otherwise be quoted). */
129
set_char_quoting (struct quoting_options *o, char c, int i)
131
unsigned char uc = c;
133
(o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
134
int shift = uc % INT_BITS;
135
int r = (*p >> shift) & 1;
136
*p ^= ((i & 1) ^ r) << shift;
140
/* In O (or in the default if O is null),
141
set the value of the quoting options flag to I, which can be a
142
bitwise combination of enum quoting_flags, or 0 for default
143
behavior. Return the old value. */
145
set_quoting_flags (struct quoting_options *o, int i)
149
o = &default_quoting_options;
156
set_custom_quoting (struct quoting_options *o,
157
char const *left_quote, char const *right_quote)
160
o = &default_quoting_options;
161
o->style = custom_quoting_style;
162
if (!left_quote || !right_quote)
164
o->left_quote = left_quote;
165
o->right_quote = right_quote;
168
/* Return quoting options for STYLE, with no extra quoting. */
169
static struct quoting_options
170
quoting_options_from_style (enum quoting_style style)
172
struct quoting_options o;
175
memset (o.quote_these_too, 0, sizeof o.quote_these_too);
179
/* MSGID approximates a quotation mark. Return its translation if it
180
has one; otherwise, return either it or "\"", depending on S. */
182
gettext_quote (char const *msgid, enum quoting_style s)
184
char const *translation = _(msgid);
185
if (translation == msgid && s == clocale_quoting_style)
190
/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
191
argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
192
QUOTE_THESE_TOO to control quoting.
193
Terminate the output with a null character, and return the written
194
size of the output, not counting the terminating null.
195
If BUFFERSIZE is too small to store the output string, return the
196
value that would have been returned had BUFFERSIZE been large enough.
197
If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
199
This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
200
ARGSIZE, O), except it breaks O into its component pieces and is
201
not careful about errno. */
204
quotearg_buffer_restyled (char *buffer, size_t buffersize,
205
char const *arg, size_t argsize,
206
enum quoting_style quoting_style, int flags,
207
unsigned int const *quote_these_too,
208
char const *left_quote,
209
char const *right_quote)
213
char const *quote_string = 0;
214
size_t quote_string_len = 0;
215
bool backslash_escapes = false;
216
bool unibyte_locale = MB_CUR_MAX == 1;
217
bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
222
if (len < buffersize) \
228
switch (quoting_style)
230
case c_maybe_quoting_style:
231
quoting_style = c_quoting_style;
232
elide_outer_quotes = true;
234
case c_quoting_style:
235
if (!elide_outer_quotes)
237
backslash_escapes = true;
239
quote_string_len = 1;
242
case escape_quoting_style:
243
backslash_escapes = true;
244
elide_outer_quotes = false;
247
case locale_quoting_style:
248
case clocale_quoting_style:
249
case custom_quoting_style:
251
if (quoting_style != custom_quoting_style)
254
Get translations for open and closing quotation marks.
256
The message catalog should translate "`" to a left
257
quotation mark suitable for the locale, and similarly for
258
"'". If the catalog has no translation,
259
locale_quoting_style quotes `like this', and
260
clocale_quoting_style quotes "like this".
262
For example, an American English Unicode locale should
263
translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
264
should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
265
MARK). A British English Unicode locale should instead
266
translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
267
and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
269
If you don't know what to put here, please see
270
<http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
271
and use glyphs suitable for your language. */
272
left_quote = gettext_quote (N_("`"), quoting_style);
273
right_quote = gettext_quote (N_("'"), quoting_style);
275
if (!elide_outer_quotes)
276
for (quote_string = left_quote; *quote_string; quote_string++)
277
STORE (*quote_string);
278
backslash_escapes = true;
279
quote_string = right_quote;
280
quote_string_len = strlen (quote_string);
284
case shell_quoting_style:
285
quoting_style = shell_always_quoting_style;
286
elide_outer_quotes = true;
288
case shell_always_quoting_style:
289
if (!elide_outer_quotes)
292
quote_string_len = 1;
295
case literal_quoting_style:
296
elide_outer_quotes = false;
303
for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
307
bool is_right_quote = false;
309
if (backslash_escapes
311
&& i + quote_string_len <= argsize
312
&& memcmp (arg + i, quote_string, quote_string_len) == 0)
314
if (elide_outer_quotes)
315
goto force_outer_quoting_style;
316
is_right_quote = true;
323
if (backslash_escapes)
325
if (elide_outer_quotes)
326
goto force_outer_quoting_style;
328
/* If quote_string were to begin with digits, we'd need to
329
test for the end of the arg as well. However, it's
330
hard to imagine any locale that would use digits in
331
quotes, and set_custom_quoting is documented not to
333
if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
339
/* We don't have to worry that this last '0' will be
340
backslash-escaped because, again, quote_string should
341
not start with it and because quote_these_too is
342
documented as not accepting it. */
344
else if (flags & QA_ELIDE_NULL_BYTES)
349
switch (quoting_style)
351
case shell_always_quoting_style:
352
if (elide_outer_quotes)
353
goto force_outer_quoting_style;
356
case c_quoting_style:
357
if ((flags & QA_SPLIT_TRIGRAPHS)
358
&& i + 2 < argsize && arg[i + 1] == '?')
362
case '(': case ')': case '-': case '/':
363
case '<': case '=': case '>':
364
/* Escape the second '?' in what would otherwise be
366
if (elide_outer_quotes)
367
goto force_outer_quoting_style;
386
case '\a': esc = 'a'; goto c_escape;
387
case '\b': esc = 'b'; goto c_escape;
388
case '\f': esc = 'f'; goto c_escape;
389
case '\n': esc = 'n'; goto c_and_shell_escape;
390
case '\r': esc = 'r'; goto c_and_shell_escape;
391
case '\t': esc = 't'; goto c_and_shell_escape;
392
case '\v': esc = 'v'; goto c_escape;
394
/* No need to escape the escape if we are trying to elide
395
outer quotes and nothing else is problematic. */
396
if (backslash_escapes && elide_outer_quotes && quote_string_len)
400
if (quoting_style == shell_always_quoting_style
401
&& elide_outer_quotes)
402
goto force_outer_quoting_style;
405
if (backslash_escapes)
412
case '{': case '}': /* sometimes special if isolated */
413
if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
421
case '!': /* special in bash */
422
case '"': case '$': case '&':
423
case '(': case ')': case '*': case ';':
425
case '=': /* sometimes special in 0th or (with "set -k") later args */
427
case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
429
/* A shell special character. In theory, '$' and '`' could
430
be the first bytes of multibyte characters, which means
431
we should check them with mbrtowc, but in practice this
432
doesn't happen so it's not worth worrying about. */
433
if (quoting_style == shell_always_quoting_style
434
&& elide_outer_quotes)
435
goto force_outer_quoting_style;
439
if (quoting_style == shell_always_quoting_style)
441
if (elide_outer_quotes)
442
goto force_outer_quoting_style;
449
case '%': case '+': case ',': case '-': case '.': case '/':
450
case '0': case '1': case '2': case '3': case '4': case '5':
451
case '6': case '7': case '8': case '9': case ':':
452
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
453
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
454
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
455
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
456
case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
457
case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
458
case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
459
case 'o': case 'p': case 'q': case 'r': case 's': case 't':
460
case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
461
/* These characters don't cause problems, no matter what the
462
quoting style is. They cannot start multibyte sequences.
463
A digit or a special letter would cause trouble if it
464
appeared at the beginning of quote_string because we'd then
465
escape by prepending a backslash. However, it's hard to
466
imagine any locale that would use digits or letters as
467
quotes, and set_custom_quoting is documented not to accept
468
them. Also, a digit or a special letter would cause
469
trouble if it appeared in quote_these_too, but that's also
470
documented as not accepting them. */
474
/* If we have a multibyte sequence, copy it until we reach
475
its end, find an error, or come back to the initial shift
476
state. For C-like styles, if the sequence has
477
unprintable characters, escape the whole sequence, since
478
we can't easily escape single characters within it. */
480
/* Length of multibyte sequence found so far. */
488
printable = isprint (c) != 0;
493
memset (&mbstate, 0, sizeof mbstate);
497
if (argsize == SIZE_MAX)
498
argsize = strlen (arg);
503
size_t bytes = mbrtowc (&w, &arg[i + m],
504
argsize - (i + m), &mbstate);
507
else if (bytes == (size_t) -1)
512
else if (bytes == (size_t) -2)
515
while (i + m < argsize && arg[i + m])
521
/* Work around a bug with older shells that "see" a '\'
522
that is really the 2nd byte of a multibyte character.
523
In practice the problem is limited to ASCII
524
chars >= '@' that are shell special chars. */
525
if ('[' == 0x5b && elide_outer_quotes
526
&& quoting_style == shell_always_quoting_style)
529
for (j = 1; j < bytes; j++)
530
switch (arg[i + m + j])
532
case '[': case '\\': case '^':
534
goto force_outer_quoting_style;
546
while (! mbsinit (&mbstate));
549
if (1 < m || (backslash_escapes && ! printable))
551
/* Output a multibyte sequence, or an escaped
552
unprintable unibyte character. */
557
if (backslash_escapes && ! printable)
559
if (elide_outer_quotes)
560
goto force_outer_quoting_style;
562
STORE ('0' + (c >> 6));
563
STORE ('0' + ((c >> 3) & 7));
566
else if (is_right_quote)
569
is_right_quote = false;
582
if (! ((backslash_escapes || elide_outer_quotes)
584
&& quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
589
if (elide_outer_quotes)
590
goto force_outer_quoting_style;
597
if (len == 0 && quoting_style == shell_always_quoting_style
598
&& elide_outer_quotes)
599
goto force_outer_quoting_style;
601
if (quote_string && !elide_outer_quotes)
602
for (; *quote_string; quote_string++)
603
STORE (*quote_string);
605
if (len < buffersize)
609
force_outer_quoting_style:
610
/* Don't reuse quote_these_too, since the addition of outer quotes
611
sufficiently quotes the specified characters. */
612
return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
614
flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
615
left_quote, right_quote);
618
/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
619
argument ARG (of size ARGSIZE), using O to control quoting.
620
If O is null, use the default.
621
Terminate the output with a null character, and return the written
622
size of the output, not counting the terminating null.
623
If BUFFERSIZE is too small to store the output string, return the
624
value that would have been returned had BUFFERSIZE been large enough.
625
If ARGSIZE is SIZE_MAX, use the string length of the argument for
628
quotearg_buffer (char *buffer, size_t buffersize,
629
char const *arg, size_t argsize,
630
struct quoting_options const *o)
632
struct quoting_options const *p = o ? o : &default_quoting_options;
634
size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
635
p->style, p->flags, p->quote_these_too,
636
p->left_quote, p->right_quote);
641
/* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
643
quotearg_alloc (char const *arg, size_t argsize,
644
struct quoting_options const *o)
646
return quotearg_alloc_mem (arg, argsize, NULL, o);
649
/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
650
allocated storage containing the quoted string, and store the
651
resulting size into *SIZE, if non-NULL. The result can contain
652
embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
653
NULL, and set_quoting_flags has not set the null byte elision
656
quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
657
struct quoting_options const *o)
659
struct quoting_options const *p = o ? o : &default_quoting_options;
661
/* Elide embedded null bytes if we can't return a size. */
662
int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
663
size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
664
flags, p->quote_these_too,
667
char *buf = xcharalloc (bufsize);
668
quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
670
p->left_quote, p->right_quote);
677
/* A storage slot with size and pointer to a value. */
684
/* Preallocate a slot 0 buffer, so that the caller can always quote
685
one small component of a "memory exhausted" message in slot 0. */
686
static char slot0[256];
687
static unsigned int nslots = 1;
688
static struct slotvec slotvec0 = {sizeof slot0, slot0};
689
static struct slotvec *slotvec = &slotvec0;
694
struct slotvec *sv = slotvec;
696
for (i = 1; i < nslots; i++)
698
if (sv[0].val != slot0)
701
slotvec0.size = sizeof slot0;
702
slotvec0.val = slot0;
712
/* Use storage slot N to return a quoted version of argument ARG.
713
ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
714
null-terminated string.
715
OPTIONS specifies the quoting options.
716
The returned value points to static storage that can be
717
reused by the next call to this function with the same value of N.
718
N must be nonnegative. N is deliberately declared with type "int"
719
to allow for future extensions (using negative values). */
721
quotearg_n_options (int n, char const *arg, size_t argsize,
722
struct quoting_options const *options)
727
struct slotvec *sv = slotvec;
734
/* FIXME: technically, the type of n1 should be `unsigned int',
735
but that evokes an unsuppressible warning from gcc-4.0.1 and
736
older. If gcc ever provides an option to suppress that warning,
737
revert to the original type, so that the test in xalloc_oversized
738
is once again performed only at compile time. */
740
bool preallocated = (sv == &slotvec0);
742
if (xalloc_oversized (n1, sizeof *sv))
745
slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
748
memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
753
size_t size = sv[n].size;
754
char *val = sv[n].val;
755
/* Elide embedded null bytes since we don't return a size. */
756
int flags = options->flags | QA_ELIDE_NULL_BYTES;
757
size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
758
options->style, flags,
759
options->quote_these_too,
761
options->right_quote);
765
sv[n].size = size = qsize + 1;
768
sv[n].val = val = xcharalloc (size);
769
quotearg_buffer_restyled (val, size, arg, argsize, options->style,
770
flags, options->quote_these_too,
772
options->right_quote);
781
quotearg_n (int n, char const *arg)
783
return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
787
quotearg_n_mem (int n, char const *arg, size_t argsize)
789
return quotearg_n_options (n, arg, argsize, &default_quoting_options);
793
quotearg (char const *arg)
795
return quotearg_n (0, arg);
799
quotearg_mem (char const *arg, size_t argsize)
801
return quotearg_n_mem (0, arg, argsize);
805
quotearg_n_style (int n, enum quoting_style s, char const *arg)
807
struct quoting_options const o = quoting_options_from_style (s);
808
return quotearg_n_options (n, arg, SIZE_MAX, &o);
812
quotearg_n_style_mem (int n, enum quoting_style s,
813
char const *arg, size_t argsize)
815
struct quoting_options const o = quoting_options_from_style (s);
816
return quotearg_n_options (n, arg, argsize, &o);
820
quotearg_style (enum quoting_style s, char const *arg)
822
return quotearg_n_style (0, s, arg);
826
quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
828
return quotearg_n_style_mem (0, s, arg, argsize);
832
quotearg_char_mem (char const *arg, size_t argsize, char ch)
834
struct quoting_options options;
835
options = default_quoting_options;
836
set_char_quoting (&options, ch, 1);
837
return quotearg_n_options (0, arg, argsize, &options);
841
quotearg_char (char const *arg, char ch)
843
return quotearg_char_mem (arg, SIZE_MAX, ch);
847
quotearg_colon (char const *arg)
849
return quotearg_char (arg, ':');
853
quotearg_colon_mem (char const *arg, size_t argsize)
855
return quotearg_char_mem (arg, argsize, ':');
859
quotearg_n_custom (int n, char const *left_quote,
860
char const *right_quote, char const *arg)
862
return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
867
quotearg_n_custom_mem (int n, char const *left_quote,
868
char const *right_quote,
869
char const *arg, size_t argsize)
871
struct quoting_options o = default_quoting_options;
872
set_custom_quoting (&o, left_quote, right_quote);
873
return quotearg_n_options (n, arg, argsize, &o);
877
quotearg_custom (char const *left_quote, char const *right_quote,
880
return quotearg_n_custom (0, left_quote, right_quote, arg);
884
quotearg_custom_mem (char const *left_quote, char const *right_quote,
885
char const *arg, size_t argsize)
887
return quotearg_n_custom_mem (0, left_quote, right_quote, arg,