1
/* quotearg.c - quote arguments for output
3
Copyright (C) 1998-2002, 2004-2011 Free Software Foundation, Inc.
5
This program is free software: you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 3 of the License, or
8
(at your option) any later version.
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with this program. If not, see <http://www.gnu.org/licenses/>. */
18
/* Written by Paul Eggert <eggert@twinsun.com> */
36
#define _(msgid) gettext (msgid)
37
#define N_(msgid) msgid
40
# define SIZE_MAX ((size_t) -1)
43
#define INT_BITS (sizeof (int) * CHAR_BIT)
45
struct quoting_options
47
/* Basic quoting style. */
48
enum quoting_style style;
50
/* Additional flags. Bitwise combination of enum quoting_flags. */
53
/* Quote the characters indicated by this bit vector even if the
54
quoting style would not normally require them to be quoted. */
55
unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
57
/* The left quote for custom_quoting_style. */
58
char const *left_quote;
60
/* The right quote for custom_quoting_style. */
61
char const *right_quote;
64
/* Names of quoting styles. */
65
char const *const quoting_style_args[] =
78
/* Correspondences to quoting style names. */
79
enum quoting_style const quoting_style_vals[] =
81
literal_quoting_style,
83
shell_always_quoting_style,
85
c_maybe_quoting_style,
91
/* The default quoting options. */
92
static struct quoting_options default_quoting_options;
94
/* Allocate a new set of quoting options, with contents initially identical
95
to O if O is not null, or to the default if O is null.
96
It is the caller's responsibility to free the result. */
97
struct quoting_options *
98
clone_quoting_options (struct quoting_options *o)
101
struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
107
/* Get the value of O's quoting style. If O is null, use the default. */
109
get_quoting_style (struct quoting_options *o)
111
return (o ? o : &default_quoting_options)->style;
114
/* In O (or in the default if O is null),
115
set the value of the quoting style to S. */
117
set_quoting_style (struct quoting_options *o, enum quoting_style s)
119
(o ? o : &default_quoting_options)->style = s;
122
/* In O (or in the default if O is null),
123
set the value of the quoting options for character C to I.
124
Return the old value. Currently, the only values defined for I are
125
0 (the default) and 1 (which means to quote the character even if
126
it would not otherwise be quoted). */
128
set_char_quoting (struct quoting_options *o, char c, int i)
130
unsigned char uc = c;
132
(o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
133
int shift = uc % INT_BITS;
134
int r = (*p >> shift) & 1;
135
*p ^= ((i & 1) ^ r) << shift;
139
/* In O (or in the default if O is null),
140
set the value of the quoting options flag to I, which can be a
141
bitwise combination of enum quoting_flags, or 0 for default
142
behavior. Return the old value. */
144
set_quoting_flags (struct quoting_options *o, int i)
148
o = &default_quoting_options;
155
set_custom_quoting (struct quoting_options *o,
156
char const *left_quote, char const *right_quote)
159
o = &default_quoting_options;
160
o->style = custom_quoting_style;
161
if (!left_quote || !right_quote)
163
o->left_quote = left_quote;
164
o->right_quote = right_quote;
167
/* Return quoting options for STYLE, with no extra quoting. */
168
static struct quoting_options
169
quoting_options_from_style (enum quoting_style style)
171
struct quoting_options o;
174
memset (o.quote_these_too, 0, sizeof o.quote_these_too);
178
/* MSGID approximates a quotation mark. Return its translation if it
179
has one; otherwise, return either it or "\"", depending on S. */
181
gettext_quote (char const *msgid, enum quoting_style s)
183
char const *translation = _(msgid);
184
if (translation == msgid && s == clocale_quoting_style)
189
/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
190
argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
191
QUOTE_THESE_TOO to control quoting.
192
Terminate the output with a null character, and return the written
193
size of the output, not counting the terminating null.
194
If BUFFERSIZE is too small to store the output string, return the
195
value that would have been returned had BUFFERSIZE been large enough.
196
If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
198
This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
199
ARGSIZE, O), except it breaks O into its component pieces and is
200
not careful about errno. */
203
quotearg_buffer_restyled (char *buffer, size_t buffersize,
204
char const *arg, size_t argsize,
205
enum quoting_style quoting_style, int flags,
206
unsigned int const *quote_these_too,
207
char const *left_quote,
208
char const *right_quote)
212
char const *quote_string = 0;
213
size_t quote_string_len = 0;
214
bool backslash_escapes = false;
215
bool unibyte_locale = MB_CUR_MAX == 1;
216
bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
221
if (len < buffersize) \
227
switch (quoting_style)
229
case c_maybe_quoting_style:
230
quoting_style = c_quoting_style;
231
elide_outer_quotes = true;
233
case c_quoting_style:
234
if (!elide_outer_quotes)
236
backslash_escapes = true;
238
quote_string_len = 1;
241
case escape_quoting_style:
242
backslash_escapes = true;
243
elide_outer_quotes = false;
246
case locale_quoting_style:
247
case clocale_quoting_style:
248
case custom_quoting_style:
250
if (quoting_style != custom_quoting_style)
253
Get translations for open and closing quotation marks.
255
The message catalog should translate "`" to a left
256
quotation mark suitable for the locale, and similarly for
257
"'". If the catalog has no translation,
258
locale_quoting_style quotes `like this', and
259
clocale_quoting_style quotes "like this".
261
For example, an American English Unicode locale should
262
translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
263
should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
264
MARK). A British English Unicode locale should instead
265
translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
266
and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
268
If you don't know what to put here, please see
269
<http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
270
and use glyphs suitable for your language. */
271
left_quote = gettext_quote (N_("`"), quoting_style);
272
right_quote = gettext_quote (N_("'"), quoting_style);
274
if (!elide_outer_quotes)
275
for (quote_string = left_quote; *quote_string; quote_string++)
276
STORE (*quote_string);
277
backslash_escapes = true;
278
quote_string = right_quote;
279
quote_string_len = strlen (quote_string);
283
case shell_quoting_style:
284
quoting_style = shell_always_quoting_style;
285
elide_outer_quotes = true;
287
case shell_always_quoting_style:
288
if (!elide_outer_quotes)
291
quote_string_len = 1;
294
case literal_quoting_style:
295
elide_outer_quotes = false;
302
for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
306
bool is_right_quote = false;
308
if (backslash_escapes
310
&& i + quote_string_len <= argsize
311
&& memcmp (arg + i, quote_string, quote_string_len) == 0)
313
if (elide_outer_quotes)
314
goto force_outer_quoting_style;
315
is_right_quote = true;
322
if (backslash_escapes)
324
if (elide_outer_quotes)
325
goto force_outer_quoting_style;
327
/* If quote_string were to begin with digits, we'd need to
328
test for the end of the arg as well. However, it's
329
hard to imagine any locale that would use digits in
330
quotes, and set_custom_quoting is documented not to
332
if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
338
/* We don't have to worry that this last '0' will be
339
backslash-escaped because, again, quote_string should
340
not start with it and because quote_these_too is
341
documented as not accepting it. */
343
else if (flags & QA_ELIDE_NULL_BYTES)
348
switch (quoting_style)
350
case shell_always_quoting_style:
351
if (elide_outer_quotes)
352
goto force_outer_quoting_style;
355
case c_quoting_style:
356
if ((flags & QA_SPLIT_TRIGRAPHS)
357
&& i + 2 < argsize && arg[i + 1] == '?')
361
case '(': case ')': case '-': case '/':
362
case '<': case '=': case '>':
363
/* Escape the second '?' in what would otherwise be
365
if (elide_outer_quotes)
366
goto force_outer_quoting_style;
385
case '\a': esc = 'a'; goto c_escape;
386
case '\b': esc = 'b'; goto c_escape;
387
case '\f': esc = 'f'; goto c_escape;
388
case '\n': esc = 'n'; goto c_and_shell_escape;
389
case '\r': esc = 'r'; goto c_and_shell_escape;
390
case '\t': esc = 't'; goto c_and_shell_escape;
391
case '\v': esc = 'v'; goto c_escape;
393
/* No need to escape the escape if we are trying to elide
394
outer quotes and nothing else is problematic. */
395
if (backslash_escapes && elide_outer_quotes && quote_string_len)
399
if (quoting_style == shell_always_quoting_style
400
&& elide_outer_quotes)
401
goto force_outer_quoting_style;
404
if (backslash_escapes)
411
case '{': case '}': /* sometimes special if isolated */
412
if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
420
case '!': /* special in bash */
421
case '"': case '$': case '&':
422
case '(': case ')': case '*': case ';':
424
case '=': /* sometimes special in 0th or (with "set -k") later args */
426
case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
428
/* A shell special character. In theory, '$' and '`' could
429
be the first bytes of multibyte characters, which means
430
we should check them with mbrtowc, but in practice this
431
doesn't happen so it's not worth worrying about. */
432
if (quoting_style == shell_always_quoting_style
433
&& elide_outer_quotes)
434
goto force_outer_quoting_style;
438
if (quoting_style == shell_always_quoting_style)
440
if (elide_outer_quotes)
441
goto force_outer_quoting_style;
448
case '%': case '+': case ',': case '-': case '.': case '/':
449
case '0': case '1': case '2': case '3': case '4': case '5':
450
case '6': case '7': case '8': case '9': case ':':
451
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
452
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
453
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
454
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
455
case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
456
case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
457
case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
458
case 'o': case 'p': case 'q': case 'r': case 's': case 't':
459
case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
460
/* These characters don't cause problems, no matter what the
461
quoting style is. They cannot start multibyte sequences.
462
A digit or a special letter would cause trouble if it
463
appeared at the beginning of quote_string because we'd then
464
escape by prepending a backslash. However, it's hard to
465
imagine any locale that would use digits or letters as
466
quotes, and set_custom_quoting is documented not to accept
467
them. Also, a digit or a special letter would cause
468
trouble if it appeared in quote_these_too, but that's also
469
documented as not accepting them. */
473
/* If we have a multibyte sequence, copy it until we reach
474
its end, find an error, or come back to the initial shift
475
state. For C-like styles, if the sequence has
476
unprintable characters, escape the whole sequence, since
477
we can't easily escape single characters within it. */
479
/* Length of multibyte sequence found so far. */
487
printable = isprint (c) != 0;
492
memset (&mbstate, 0, sizeof mbstate);
496
if (argsize == SIZE_MAX)
497
argsize = strlen (arg);
502
size_t bytes = mbrtowc (&w, &arg[i + m],
503
argsize - (i + m), &mbstate);
506
else if (bytes == (size_t) -1)
511
else if (bytes == (size_t) -2)
514
while (i + m < argsize && arg[i + m])
520
/* Work around a bug with older shells that "see" a '\'
521
that is really the 2nd byte of a multibyte character.
522
In practice the problem is limited to ASCII
523
chars >= '@' that are shell special chars. */
524
if ('[' == 0x5b && elide_outer_quotes
525
&& quoting_style == shell_always_quoting_style)
528
for (j = 1; j < bytes; j++)
529
switch (arg[i + m + j])
531
case '[': case '\\': case '^':
533
goto force_outer_quoting_style;
545
while (! mbsinit (&mbstate));
548
if (1 < m || (backslash_escapes && ! printable))
550
/* Output a multibyte sequence, or an escaped
551
unprintable unibyte character. */
556
if (backslash_escapes && ! printable)
558
if (elide_outer_quotes)
559
goto force_outer_quoting_style;
561
STORE ('0' + (c >> 6));
562
STORE ('0' + ((c >> 3) & 7));
565
else if (is_right_quote)
568
is_right_quote = false;
581
if (! ((backslash_escapes || elide_outer_quotes)
583
&& quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
588
if (elide_outer_quotes)
589
goto force_outer_quoting_style;
596
if (len == 0 && quoting_style == shell_always_quoting_style
597
&& elide_outer_quotes)
598
goto force_outer_quoting_style;
600
if (quote_string && !elide_outer_quotes)
601
for (; *quote_string; quote_string++)
602
STORE (*quote_string);
604
if (len < buffersize)
608
force_outer_quoting_style:
609
/* Don't reuse quote_these_too, since the addition of outer quotes
610
sufficiently quotes the specified characters. */
611
return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
613
flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
614
left_quote, right_quote);
617
/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
618
argument ARG (of size ARGSIZE), using O to control quoting.
619
If O is null, use the default.
620
Terminate the output with a null character, and return the written
621
size of the output, not counting the terminating null.
622
If BUFFERSIZE is too small to store the output string, return the
623
value that would have been returned had BUFFERSIZE been large enough.
624
If ARGSIZE is SIZE_MAX, use the string length of the argument for
627
quotearg_buffer (char *buffer, size_t buffersize,
628
char const *arg, size_t argsize,
629
struct quoting_options const *o)
631
struct quoting_options const *p = o ? o : &default_quoting_options;
633
size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
634
p->style, p->flags, p->quote_these_too,
635
p->left_quote, p->right_quote);
640
/* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
642
quotearg_alloc (char const *arg, size_t argsize,
643
struct quoting_options const *o)
645
return quotearg_alloc_mem (arg, argsize, NULL, o);
648
/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
649
allocated storage containing the quoted string, and store the
650
resulting size into *SIZE, if non-NULL. The result can contain
651
embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
652
NULL, and set_quoting_flags has not set the null byte elision
655
quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
656
struct quoting_options const *o)
658
struct quoting_options const *p = o ? o : &default_quoting_options;
660
/* Elide embedded null bytes if we can't return a size. */
661
int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
662
size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
663
flags, p->quote_these_too,
666
char *buf = xcharalloc (bufsize);
667
quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
669
p->left_quote, p->right_quote);
676
/* A storage slot with size and pointer to a value. */
683
/* Preallocate a slot 0 buffer, so that the caller can always quote
684
one small component of a "memory exhausted" message in slot 0. */
685
static char slot0[256];
686
static unsigned int nslots = 1;
687
static struct slotvec slotvec0 = {sizeof slot0, slot0};
688
static struct slotvec *slotvec = &slotvec0;
693
struct slotvec *sv = slotvec;
695
for (i = 1; i < nslots; i++)
697
if (sv[0].val != slot0)
700
slotvec0.size = sizeof slot0;
701
slotvec0.val = slot0;
711
/* Use storage slot N to return a quoted version of argument ARG.
712
ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
713
null-terminated string.
714
OPTIONS specifies the quoting options.
715
The returned value points to static storage that can be
716
reused by the next call to this function with the same value of N.
717
N must be nonnegative. N is deliberately declared with type "int"
718
to allow for future extensions (using negative values). */
720
quotearg_n_options (int n, char const *arg, size_t argsize,
721
struct quoting_options const *options)
726
struct slotvec *sv = slotvec;
733
/* FIXME: technically, the type of n1 should be `unsigned int',
734
but that evokes an unsuppressible warning from gcc-4.0.1 and
735
older. If gcc ever provides an option to suppress that warning,
736
revert to the original type, so that the test in xalloc_oversized
737
is once again performed only at compile time. */
739
bool preallocated = (sv == &slotvec0);
741
if (xalloc_oversized (n1, sizeof *sv))
744
slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
747
memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
752
size_t size = sv[n].size;
753
char *val = sv[n].val;
754
/* Elide embedded null bytes since we don't return a size. */
755
int flags = options->flags | QA_ELIDE_NULL_BYTES;
756
size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
757
options->style, flags,
758
options->quote_these_too,
760
options->right_quote);
764
sv[n].size = size = qsize + 1;
767
sv[n].val = val = xcharalloc (size);
768
quotearg_buffer_restyled (val, size, arg, argsize, options->style,
769
flags, options->quote_these_too,
771
options->right_quote);
780
quotearg_n (int n, char const *arg)
782
return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
786
quotearg_n_mem (int n, char const *arg, size_t argsize)
788
return quotearg_n_options (n, arg, argsize, &default_quoting_options);
792
quotearg (char const *arg)
794
return quotearg_n (0, arg);
798
quotearg_mem (char const *arg, size_t argsize)
800
return quotearg_n_mem (0, arg, argsize);
804
quotearg_n_style (int n, enum quoting_style s, char const *arg)
806
struct quoting_options const o = quoting_options_from_style (s);
807
return quotearg_n_options (n, arg, SIZE_MAX, &o);
811
quotearg_n_style_mem (int n, enum quoting_style s,
812
char const *arg, size_t argsize)
814
struct quoting_options const o = quoting_options_from_style (s);
815
return quotearg_n_options (n, arg, argsize, &o);
819
quotearg_style (enum quoting_style s, char const *arg)
821
return quotearg_n_style (0, s, arg);
825
quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
827
return quotearg_n_style_mem (0, s, arg, argsize);
831
quotearg_char_mem (char const *arg, size_t argsize, char ch)
833
struct quoting_options options;
834
options = default_quoting_options;
835
set_char_quoting (&options, ch, 1);
836
return quotearg_n_options (0, arg, argsize, &options);
840
quotearg_char (char const *arg, char ch)
842
return quotearg_char_mem (arg, SIZE_MAX, ch);
846
quotearg_colon (char const *arg)
848
return quotearg_char (arg, ':');
852
quotearg_colon_mem (char const *arg, size_t argsize)
854
return quotearg_char_mem (arg, argsize, ':');
858
quotearg_n_custom (int n, char const *left_quote,
859
char const *right_quote, char const *arg)
861
return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
866
quotearg_n_custom_mem (int n, char const *left_quote,
867
char const *right_quote,
868
char const *arg, size_t argsize)
870
struct quoting_options o = default_quoting_options;
871
set_custom_quoting (&o, left_quote, right_quote);
872
return quotearg_n_options (n, arg, argsize, &o);
876
quotearg_custom (char const *left_quote, char const *right_quote,
879
return quotearg_n_custom (0, left_quote, right_quote, arg);
883
quotearg_custom_mem (char const *left_quote, char const *right_quote,
884
char const *arg, size_t argsize)
886
return quotearg_n_custom_mem (0, left_quote, right_quote, arg,