1
/* __gmp_doscan -- formatted input internals.
3
THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5
FUTURE GNU MP RELEASES.
7
Copyright 2001 Free Software Foundation, Inc.
9
This file is part of the GNU MP Library.
11
The GNU MP Library is free software; you can redistribute it and/or modify
12
it under the terms of the GNU Lesser General Public License as published by
13
the Free Software Foundation; either version 2.1 of the License, or (at your
14
option) any later version.
16
The GNU MP Library is distributed in the hope that it will be useful, but
17
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19
License for more details.
21
You should have received a copy of the GNU Lesser General Public License
22
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24
MA 02111-1307, USA. */
35
#include <stddef.h> /* for ptrdiff_t */
40
#include <locale.h> /* for localeconv */
44
#include <stdint.h> /* for intmax_t */
48
#include <sys/types.h> /* for quad_t */
55
/* Change this to "#define TRACE(x) x" for some traces. */
59
/* It's necessary to parse up the string to recognise the GMP extra types F,
60
Q and Z. Other types and conversions are passed across to the standard
61
sscanf or fscanf via funs->scan, for ease of implemenation. This is
62
essential in the case of something like glibc %p where the pointer format
63
isn't actually documented.
65
Because funs->scan doesn't get the whole input it can't put the right
66
values in for %n, so that's handled in __gmp_doscan. Neither sscanf nor
67
fscanf directly indicate how many characters were read, so an extra %n is
68
appended to each run for that. For fscanf this merely supports our %n
69
output, but for sscanf it lets funs->step move us along the input string.
71
Whitespace and literal matches in the format string, including %%, are
72
handled directly within __gmp_doscan. This is reasonably efficient, and
73
avoids some suspicious behaviour observed in various system libc's.
74
GLIBC 2.2.4 for instance returns 0 on sscanf(" "," x") or on sscanf(" ",
75
" x%d",&n), whereas we think they should return EOF, since end-of-string
76
is reached when a match of "x" is required.
78
For standard % conversions, funs->scan is called once for each
79
conversion. If we had vfscanf and vsscanf and could rely on their fixed
80
text matching behaviour then we could call them with multiple consecutive
81
standard conversions. But plain fscanf and sscanf work fine, and parsing
82
one field at a time shouldn't be too much of a slowdown.
84
gmpscan reads a gmp type. It's only used from one place, but is a
85
separate subroutine to avoid a big chunk of complicated code in the
86
middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it
87
possible to share code for parsing integers, rationals and floats.
89
In gmpscan normally one char of lookahead is maintained, but when width
90
is reached that stops, on the principle that an fgetc/ungetc of a char
91
past where we're told to stop would be undesirable. "chars" is how many
92
characters have been read so far, including the current c. When
93
chars==width and another character is desired then a jump is done to the
94
"convert" stage. c is invalid and mustn't be unget'ed in this case;
95
chars is set to width+1 to indicate that.
97
gmpscan normally returns the number of characters read. -1 means an
98
invalid field, like a "-" or "+" alone. -2 means EOF reached before any
99
matching characters were read.
101
Consideration was given to using a separate code for gmp_fscanf and
102
gmp_sscanf. The sscanf case could zip across a string making literal
103
matches or recognising digits in gmpscan, rather than making a function
104
call fun->get per character. The fscanf could use getc rather than fgetc
105
too, which might help those systems where getc is a macro or otherwise
106
inlined. But none of this scanning and converting will be particularly
107
fast, so the two are done together to keep it a bit simpler for now.
111
A way to read the GLIBC printf %a format that we support in gmp_printf
112
would be good. That would probably be good for plain GLIBC scanf too, so
113
perhaps we can simply follow its lead if it gets such a feature in the
117
struct gmp_doscan_params_t {
127
ASSERT (chars <= width); \
131
(c) = (*funs->get) (data); \
134
/* store into "s", extending if necessary */
137
ASSERT (s_upto <= s_alloc); \
138
if (s_upto >= s_alloc) \
140
size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \
141
s = (*__gmp_reallocate_func) (s, s_alloc, s_alloc_new); \
142
s_alloc = s_alloc_new; \
147
#define S_ALLOC_STEP 512
150
gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
151
const struct gmp_doscan_params_t *p, void *dst)
153
int chars, c, base, first, width, seen_point, seen_digit;
154
size_t s_upto, s_alloc;
158
TRACE (printf ("gmpscan\n"));
160
ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
162
c = (*funs->get) (data);
170
width = (p->width == 0 ? INT_MAX-1 : p->width);
172
s_alloc = S_ALLOC_STEP;
173
s = (*__gmp_allocate_func) (s_alloc);
184
/* don't store '+', it's not accepted by mpz_set_str etc */
198
if (c == 'x' || c == 'X')
213
if (! (isascii (c) && isxdigit (c)))
218
if (! (isascii (c) && isdigit (c)))
220
if (base == 8 && (c == '8' || c == '9'))
232
if (p->type == 'F' && ! seen_point)
235
/* For a multi-character decimal point, if the first character is
236
present then all of it must be, otherwise the input is
237
considered invalid. */
240
point = localeconv()->decimal_point;
261
goto store_get_digits;
267
if (p->type == 'F' && (c == 'e' || c == 'E'))
269
/* must have at least one digit in the mantissa, just an exponent
270
is not good enough */
282
if (p->type == 'Q' && c == '/')
284
/* must have at least one digit in the numerator */
288
/* now look for at least one digit in the denominator */
291
/* allow the base to be redetermined for "%i" */
308
TRACE (printf (" convert \"%s\"\n", s));
310
/* We ought to have parsed out a valid string above, so just test
311
mpz_set_str etc with an ASSERT. */
314
ASSERT (p->base == 10);
315
ASSERT_NOCARRY (mpf_set_str (dst, s, 10));
318
ASSERT_NOCARRY (mpq_set_str (dst, s, p->base));
321
ASSERT_NOCARRY (mpz_set_str (dst, s, p->base));
331
ASSERT (chars <= width+1);
332
if (chars != width+1)
334
(*funs->unget) (c, data);
335
TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1));
339
(*__gmp_free_func) (s, s_alloc);
343
TRACE (printf (" invalid\n"));
347
TRACE (printf (" return %d chars (cf width %d)\n", chars, width));
352
/* Read and discard whitespace, if any. Return number of chars skipped.
353
Whitespace skipping never provokes the EOF return from __gmp_doscan, so
354
it's not necessary to watch for EOF from funs->get, */
356
skip_white (const struct gmp_doscan_funs_t *funs, void *data)
363
c = (funs->get) (data);
366
while (isascii (c) && isspace (c));
368
(funs->unget) (c, data);
371
TRACE (printf (" skip white %d\n", ret));
377
__gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
378
const char *orig_fmt, va_list orig_ap)
380
struct gmp_doscan_params_t param;
383
const char *fmt, *this_fmt, *end_fmt;
384
size_t orig_fmt_len, alloc_fmt_size, len;
385
int new_fields, new_chars;
390
TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
391
if (funs->scan == (gmp_doscan_scan_t) sscanf)
392
printf (" s=\"%s\"\n", (const char *) data));
394
/* Don't modify orig_ap, if va_list is actually an array and hence call by
395
reference. It could be argued that it'd be more efficient to leave
396
callers to make a copy if they care, but doing so here is going to be a
397
very small part of the total work, and we may as well keep applications
399
va_copy (ap, orig_ap);
401
/* Parts of the format string are going to be copied so that a " %n" can
402
be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be
403
needed if fmt consists of a single "%" specifier, but otherwise is an
404
overestimate. We're not going to be very fast here, so use
405
__gmp_allocate_func rather than TMP_ALLOC. */
406
orig_fmt_len = strlen (orig_fmt);
407
alloc_fmt_size = orig_fmt_len + 4;
408
alloc_fmt = (*__gmp_allocate_func) (alloc_fmt_size);
411
end_fmt = orig_fmt + orig_fmt_len;
421
if (isascii (fchar) && isspace (fchar))
423
chars += skip_white (funs, data);
431
c = (funs->get) (data);
434
(funs->unget) (c, data);
453
TRACE (printf (" this_fmt \"%s\"\n", this_fmt));
457
ASSERT (fmt <= end_fmt);
462
case '\0': /* unterminated % sequence */
466
case '%': /* literal % */
469
case '[': /* character range */
473
/* ']' allowed as the first char (possibly after '^') */
478
ASSERT (fmt <= end_fmt);
481
/* unterminated % sequence */
490
case 'c': /* characters */
491
case 's': /* string of non-whitespace */
492
case 'p': /* pointer */
494
len = fmt - this_fmt;
495
memcpy (alloc_fmt, this_fmt, len);
496
alloc_fmt[len++] = '%';
497
alloc_fmt[len++] = 'n';
498
alloc_fmt[len] = '\0';
500
TRACE (printf (" scan \"%s\"\n", alloc_fmt);
501
if (funs->scan == (gmp_doscan_scan_t) sscanf)
502
printf (" s=\"%s\"\n", (const char *) data));
507
new_fields = (*funs->scan) (data, alloc_fmt, &new_chars);
508
ASSERT (new_fields == 0 || new_fields == EOF);
512
new_fields = (*funs->scan) (data, alloc_fmt,
513
va_arg (ap, void *), &new_chars);
514
ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
517
goto done; /* invalid input */
520
ASSERT (new_chars != -1);
522
TRACE (printf (" new_fields %d new_chars %d\n",
523
new_fields, new_chars));
525
if (new_fields == -1)
526
goto eof_no_match; /* EOF before anything matched */
528
/* Wnder param.ignore, when new_fields==0 we don't know if
529
it's a successful match or an invalid field. new_chars
530
won't have been assigned if it was an invalid field. */
532
goto done; /* invalid input */
535
(*funs->step) (data, new_chars);
542
case 'd': /* decimal */
543
case 'e': /* float */
544
case 'E': /* float */
545
case 'f': /* float */
546
case 'g': /* float */
547
case 'G': /* float */
548
case 'u': /* decimal */
550
if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
553
chars += skip_white (funs, data);
555
new_chars = gmpscan (funs, data, ¶m,
556
param.ignore ? NULL : va_arg (ap, void*));
562
ASSERT (new_chars >= 0);
564
goto increment_fields;
566
case 'a': /* glibc allocate string */
567
case '\'': /* glibc digit groupings */
570
case 'F': /* mpf_t */
571
case 'j': /* intmax_t */
572
case 'L': /* long long */
573
case 'q': /* quad_t */
574
case 'Q': /* mpq_t */
575
case 't': /* ptrdiff_t */
576
case 'z': /* size_t */
577
case 'Z': /* mpz_t */
582
case 'h': /* short or char */
583
if (param.type != 'h')
585
param.type = 'H'; /* internal code for "hh" */
592
case 'l': /* long, long long, double or long double */
593
if (param.type != 'l')
595
param.type = 'L'; /* "ll" means "L" */
602
p = va_arg (ap, void *);
603
TRACE (printf (" store %%n to %p\n", p));
604
switch (param.type) {
605
case '\0': * (int *) p = chars; break;
606
case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break;
607
case 'H': * (char *) p = chars; break;
608
case 'h': * (short *) p = chars; break;
610
case 'j': * (intmax_t *) p = chars; break;
612
case 'j': ASSERT_FAIL (intmax_t not available); break;
614
case 'l': * (long *) p = chars; break;
615
#if HAVE_QUAD_T && HAVE_LONG_LONG
617
ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
620
case 'q': ASSERT_FAIL (quad_t not available); break;
623
case 'L': * (long long *) p = chars; break;
625
case 'L': ASSERT_FAIL (long long not available); break;
627
case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
629
case 't': * (ptrdiff_t *) p = chars; break;
631
case 't': ASSERT_FAIL (ptrdiff_t not available); break;
633
case 'z': * (size_t *) p = chars; break;
634
case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break;
635
default: ASSERT (0); break;
649
case '0': case '1': case '2': case '3': case '4':
650
case '5': case '6': case '7': case '8': case '9':
653
param.width = param.width * 10 + (fchar-'0');
655
} while (isascii (fchar) && isdigit (fchar));
656
fmt--; /* unget the non-digit */
664
/* something invalid in a % sequence */
672
(*__gmp_free_func) (alloc_fmt, alloc_fmt_size);