1
/* quotearg.c - quote arguments for output
2
Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2, or (at your option)
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software Foundation,
16
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18
/* Written by Paul Eggert <eggert@twinsun.com> */
35
#define _(msgid) gettext (msgid)
36
#define N_(msgid) msgid
40
/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
48
/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
49
other macros are defined only for documentation and to satisfy C
53
# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
54
# define iswprint(wc) isprint ((unsigned char) (wc))
58
#if !defined mbsinit && !HAVE_MBSINIT
59
# define mbsinit(ps) 1
66
# if !defined iswprint && !HAVE_ISWPRINT
67
# define iswprint(wc) 1
72
# define SIZE_MAX ((size_t) -1)
75
#define INT_BITS (sizeof (int) * CHAR_BIT)
77
struct quoting_options
79
/* Basic quoting style. */
80
enum quoting_style style;
82
/* Quote the characters indicated by this bit vector even if the
83
quoting style would not normally require them to be quoted. */
84
int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
87
/* Names of quoting styles. */
88
char const *const quoting_style_args[] =
100
/* Correspondences to quoting style names. */
101
enum quoting_style const quoting_style_vals[] =
103
literal_quoting_style,
105
shell_always_quoting_style,
107
escape_quoting_style,
108
locale_quoting_style,
109
clocale_quoting_style
112
/* The default quoting options. */
113
static struct quoting_options default_quoting_options;
115
/* Allocate a new set of quoting options, with contents initially identical
116
to O if O is not null, or to the default if O is null.
117
It is the caller's responsibility to free the result. */
118
struct quoting_options *
119
clone_quoting_options (struct quoting_options *o)
122
struct quoting_options *p = xmalloc (sizeof *p);
123
*p = *(o ? o : &default_quoting_options);
128
/* Get the value of O's quoting style. If O is null, use the default. */
130
get_quoting_style (struct quoting_options *o)
132
return (o ? o : &default_quoting_options)->style;
135
/* In O (or in the default if O is null),
136
set the value of the quoting style to S. */
138
set_quoting_style (struct quoting_options *o, enum quoting_style s)
140
(o ? o : &default_quoting_options)->style = s;
143
/* In O (or in the default if O is null),
144
set the value of the quoting options for character C to I.
145
Return the old value. Currently, the only values defined for I are
146
0 (the default) and 1 (which means to quote the character even if
147
it would not otherwise be quoted). */
149
set_char_quoting (struct quoting_options *o, char c, int i)
151
unsigned char uc = c;
152
int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
153
int shift = uc % INT_BITS;
154
int r = (*p >> shift) & 1;
155
*p ^= ((i & 1) ^ r) << shift;
159
/* MSGID approximates a quotation mark. Return its translation if it
160
has one; otherwise, return either it or "\"", depending on S. */
162
gettext_quote (char const *msgid, enum quoting_style s)
164
char const *translation = _(msgid);
165
if (translation == msgid && s == clocale_quoting_style)
170
/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
171
argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
172
non-quoting-style part of O to control quoting.
173
Terminate the output with a null character, and return the written
174
size of the output, not counting the terminating null.
175
If BUFFERSIZE is too small to store the output string, return the
176
value that would have been returned had BUFFERSIZE been large enough.
177
If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
179
This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
180
ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
181
style specified by O, and O may not be null. */
184
quotearg_buffer_restyled (char *buffer, size_t buffersize,
185
char const *arg, size_t argsize,
186
enum quoting_style quoting_style,
187
struct quoting_options const *o)
191
char const *quote_string = 0;
192
size_t quote_string_len = 0;
193
int backslash_escapes = 0;
194
int unibyte_locale = MB_CUR_MAX == 1;
199
if (len < buffersize) \
205
switch (quoting_style)
207
case c_quoting_style:
209
backslash_escapes = 1;
211
quote_string_len = 1;
214
case escape_quoting_style:
215
backslash_escapes = 1;
218
case locale_quoting_style:
219
case clocale_quoting_style:
221
/* Get translations for open and closing quotation marks.
223
The message catalog should translate "`" to a left
224
quotation mark suitable for the locale, and similarly for
225
"'". If the catalog has no translation,
226
locale_quoting_style quotes `like this', and
227
clocale_quoting_style quotes "like this".
229
For example, an American English Unicode locale should
230
translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
231
should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
232
MARK). A British English Unicode locale should instead
233
translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
234
U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
236
char const *left = gettext_quote (N_("`"), quoting_style);
237
char const *right = gettext_quote (N_("'"), quoting_style);
238
for (quote_string = left; *quote_string; quote_string++)
239
STORE (*quote_string);
240
backslash_escapes = 1;
241
quote_string = right;
242
quote_string_len = strlen (quote_string);
246
case shell_always_quoting_style:
249
quote_string_len = 1;
256
for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
261
if (backslash_escapes
263
&& i + quote_string_len <= argsize
264
&& memcmp (arg + i, quote_string, quote_string_len) == 0)
271
if (backslash_escapes)
281
switch (quoting_style)
283
case shell_quoting_style:
284
goto use_shell_always_quoting_style;
286
case c_quoting_style:
287
if (i + 2 < argsize && arg[i + 1] == '?')
291
case '(': case ')': case '-': case '/':
292
case '<': case '=': case '>':
293
/* Escape the second '?' in what would otherwise be
309
case '\a': esc = 'a'; goto c_escape;
310
case '\b': esc = 'b'; goto c_escape;
311
case '\f': esc = 'f'; goto c_escape;
312
case '\n': esc = 'n'; goto c_and_shell_escape;
313
case '\r': esc = 'r'; goto c_and_shell_escape;
314
case '\t': esc = 't'; goto c_and_shell_escape;
315
case '\v': esc = 'v'; goto c_escape;
316
case '\\': esc = c; goto c_and_shell_escape;
319
if (quoting_style == shell_quoting_style)
320
goto use_shell_always_quoting_style;
322
if (backslash_escapes)
334
case '!': /* special in bash */
335
case '"': case '$': case '&':
336
case '(': case ')': case '*': case ';':
337
case '<': case '>': case '[':
338
case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
340
/* A shell special character. In theory, '$' and '`' could
341
be the first bytes of multibyte characters, which means
342
we should check them with mbrtowc, but in practice this
343
doesn't happen so it's not worth worrying about. */
344
if (quoting_style == shell_quoting_style)
345
goto use_shell_always_quoting_style;
349
switch (quoting_style)
351
case shell_quoting_style:
352
goto use_shell_always_quoting_style;
354
case shell_always_quoting_style:
365
case '%': case '+': case ',': case '-': case '.': case '/':
366
case '0': case '1': case '2': case '3': case '4': case '5':
367
case '6': case '7': case '8': case '9': case ':': case '=':
368
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
369
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
370
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
371
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
372
case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
373
case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
374
case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
375
case 'o': case 'p': case 'q': case 'r': case 's': case 't':
376
case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
378
/* These characters don't cause problems, no matter what the
379
quoting style is. They cannot start multibyte sequences. */
383
/* If we have a multibyte sequence, copy it until we reach
384
its end, find an error, or come back to the initial shift
385
state. For C-like styles, if the sequence has
386
unprintable characters, escape the whole sequence, since
387
we can't easily escape single characters within it. */
389
/* Length of multibyte sequence found so far. */
397
printable = isprint (c);
402
memset (&mbstate, 0, sizeof mbstate);
406
if (argsize == SIZE_MAX)
407
argsize = strlen (arg);
412
size_t bytes = mbrtowc (&w, &arg[i + m],
413
argsize - (i + m), &mbstate);
416
else if (bytes == (size_t) -1)
421
else if (bytes == (size_t) -2)
424
while (i + m < argsize && arg[i + m])
435
while (! mbsinit (&mbstate));
438
if (1 < m || (backslash_escapes && ! printable))
440
/* Output a multibyte sequence, or an escaped
441
unprintable unibyte character. */
446
if (backslash_escapes && ! printable)
449
STORE ('0' + (c >> 6));
450
STORE ('0' + ((c >> 3) & 7));
464
if (! (backslash_escapes
465
&& o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
476
for (; *quote_string; quote_string++)
477
STORE (*quote_string);
479
if (len < buffersize)
483
use_shell_always_quoting_style:
484
return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
485
shell_always_quoting_style, o);
488
/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
489
argument ARG (of size ARGSIZE), using O to control quoting.
490
If O is null, use the default.
491
Terminate the output with a null character, and return the written
492
size of the output, not counting the terminating null.
493
If BUFFERSIZE is too small to store the output string, return the
494
value that would have been returned had BUFFERSIZE been large enough.
495
If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
497
quotearg_buffer (char *buffer, size_t buffersize,
498
char const *arg, size_t argsize,
499
struct quoting_options const *o)
501
struct quoting_options const *p = o ? o : &default_quoting_options;
503
size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
509
/* Use storage slot N to return a quoted version of argument ARG.
510
ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
511
OPTIONS specifies the quoting options.
512
The returned value points to static storage that can be
513
reused by the next call to this function with the same value of N.
514
N must be nonnegative. N is deliberately declared with type "int"
515
to allow for future extensions (using negative values). */
517
quotearg_n_options (int n, char const *arg, size_t argsize,
518
struct quoting_options const *options)
522
/* Preallocate a slot 0 buffer, so that the caller can always quote
523
one small component of a "memory exhausted" message in slot 0. */
524
static char slot0[256];
525
static unsigned int nslots = 1;
532
static struct slotvec slotvec0 = {sizeof slot0, slot0};
533
static struct slotvec *slotvec = &slotvec0;
540
unsigned int n1 = n0 + 1;
541
size_t s = n1 * sizeof *slotvec;
543
if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
544
&& n1 != s / sizeof *slotvec)
547
if (slotvec == &slotvec0)
549
slotvec = xmalloc (sizeof *slotvec);
552
slotvec = xrealloc (slotvec, s);
553
memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
558
size_t size = slotvec[n].size;
559
char *val = slotvec[n].val;
560
size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
564
slotvec[n].size = size = qsize + 1;
565
slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
566
quotearg_buffer (val, size, arg, argsize, options);
575
quotearg_n (int n, char const *arg)
577
return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
581
quotearg (char const *arg)
583
return quotearg_n (0, arg);
586
/* Return quoting options for STYLE, with no extra quoting. */
587
static struct quoting_options
588
quoting_options_from_style (enum quoting_style style)
590
struct quoting_options o;
592
memset (o.quote_these_too, 0, sizeof o.quote_these_too);
597
quotearg_n_style (int n, enum quoting_style s, char const *arg)
599
struct quoting_options const o = quoting_options_from_style (s);
600
return quotearg_n_options (n, arg, SIZE_MAX, &o);
604
quotearg_n_style_mem (int n, enum quoting_style s,
605
char const *arg, size_t argsize)
607
struct quoting_options const o = quoting_options_from_style (s);
608
return quotearg_n_options (n, arg, argsize, &o);
612
quotearg_style (enum quoting_style s, char const *arg)
614
return quotearg_n_style (0, s, arg);
618
quotearg_char (char const *arg, char ch)
620
struct quoting_options options;
621
options = default_quoting_options;
622
set_char_quoting (&options, ch, 1);
623
return quotearg_n_options (0, arg, SIZE_MAX, &options);
627
quotearg_colon (char const *arg)
629
return quotearg_char (arg, ':');