1
/* -*- Mode: C; c-file-style: "python" -*- */
6
/* ascii character tests (as opposed to locale tests) */
7
#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8
(c) == '\r' || (c) == '\t' || (c) == '\v')
9
#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
14
* @nptr: the string to convert to a numeric value.
15
* @endptr: if non-%NULL, it returns the character after
16
* the last character used in the conversion.
18
* Converts a string to a #gdouble value.
19
* This function behaves like the standard strtod() function
20
* does in the C locale. It does this without actually
21
* changing the current locale, since that would not be
24
* This function is typically used when reading configuration
25
* files or other non-user input that should be locale independent.
26
* To handle input from the user you should normally use the
27
* locale-sensitive system strtod() function.
29
* If the correct value would cause overflow, plus or minus %HUGE_VAL
30
* is returned (according to the sign of the value), and %ERANGE is
31
* stored in %errno. If the correct value would cause underflow,
32
* zero is returned and %ERANGE is stored in %errno.
33
* If memory allocation fails, %ENOMEM is stored in %errno.
35
* This function resets %errno before calling strtod() so that
36
* you can reliably detect overflow and underflow.
38
* Return value: the #gdouble value.
41
PyOS_ascii_strtod(const char *nptr, char **endptr)
45
struct lconv *locale_data;
46
const char *decimal_point;
47
size_t decimal_point_len;
48
const char *p, *decimal_point_pos;
49
const char *end = NULL; /* Silence gcc */
50
const char *digits_pos = NULL;
57
locale_data = localeconv();
58
decimal_point = locale_data->decimal_point;
59
decimal_point_len = strlen(decimal_point);
61
assert(decimal_point_len != 0);
63
decimal_point_pos = NULL;
65
/* We process any leading whitespace and the optional sign manually,
66
then pass the remainder to the system strtod. This ensures that
67
the result of an underflow has the correct sign. (bug #1725) */
70
/* Skip leading space */
74
/* Process leading sign, if present */
78
} else if (*p == '+') {
82
/* What's left should begin with a digit, a decimal point, or one of
83
the letters i, I, n, N. It should not begin with 0x or 0X */
85
*p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
87
(*p == '0' && (p[1] == 'x' || p[1] == 'X')))
90
*endptr = (char*)nptr;
96
if (decimal_point[0] != '.' ||
97
decimal_point[1] != 0)
104
decimal_point_pos = p++;
109
if (*p == 'e' || *p == 'E')
111
if (*p == '+' || *p == '-')
117
else if (strncmp(p, decimal_point, decimal_point_len) == 0)
119
/* Python bug #1417699 */
121
*endptr = (char*)nptr;
125
/* For the other cases, we need not convert the decimal
129
/* Set errno to zero, so that we can distinguish zero results
133
if (decimal_point_pos)
137
/* We need to convert the '.' to the locale specific decimal
139
copy = (char *)PyMem_MALLOC(end - digits_pos +
140
1 + decimal_point_len);
143
*endptr = (char *)nptr;
149
memcpy(c, digits_pos, decimal_point_pos - digits_pos);
150
c += decimal_point_pos - digits_pos;
151
memcpy(c, decimal_point, decimal_point_len);
152
c += decimal_point_len;
153
memcpy(c, decimal_point_pos + 1,
154
end - (decimal_point_pos + 1));
155
c += end - (decimal_point_pos + 1);
158
val = strtod(copy, &fail_pos);
162
if (fail_pos > decimal_point_pos)
163
fail_pos = (char *)digits_pos +
165
(decimal_point_len - 1);
167
fail_pos = (char *)digits_pos +
175
val = strtod(digits_pos, &fail_pos);
178
if (fail_pos == digits_pos)
179
fail_pos = (char *)nptr;
181
if (negate && fail_pos != nptr)
190
/* Given a string that may have a decimal point in the current
191
locale, change it back to a dot. Since the string cannot get
192
longer, no need for a maximum buffer size parameter. */
193
Py_LOCAL_INLINE(void)
194
change_decimal_from_locale_to_dot(char* buffer)
196
struct lconv *locale_data = localeconv();
197
const char *decimal_point = locale_data->decimal_point;
199
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
200
size_t decimal_point_len = strlen(decimal_point);
202
if (*buffer == '+' || *buffer == '-')
204
while (isdigit(Py_CHARMASK(*buffer)))
206
if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
209
if (decimal_point_len > 1) {
210
/* buffer needs to get smaller */
211
size_t rest_len = strlen(buffer +
212
(decimal_point_len - 1));
214
buffer + (decimal_point_len - 1),
216
buffer[rest_len] = 0;
223
/* From the C99 standard, section 7.19.6:
224
The exponent always contains at least two digits, and only as many more digits
225
as necessary to represent the exponent.
227
#define MIN_EXPONENT_DIGITS 2
229
/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
231
Py_LOCAL_INLINE(void)
232
ensure_minumim_exponent_length(char* buffer, size_t buf_size)
234
char *p = strpbrk(buffer, "eE");
235
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
237
int exponent_digit_cnt = 0;
238
int leading_zero_cnt = 0;
239
int in_leading_zeros = 1;
240
int significant_digit_cnt;
242
/* Skip over the exponent and the sign. */
245
/* Find the end of the exponent, keeping track of leading
247
while (*p && isdigit(Py_CHARMASK(*p))) {
248
if (in_leading_zeros && *p == '0')
251
in_leading_zeros = 0;
253
++exponent_digit_cnt;
256
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
257
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
258
/* If there are 2 exactly digits, we're done,
259
regardless of what they contain */
261
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
264
/* There are more than 2 digits in the exponent. See
265
if we can delete some of the leading zeros */
266
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
267
significant_digit_cnt = MIN_EXPONENT_DIGITS;
268
extra_zeros_cnt = exponent_digit_cnt -
269
significant_digit_cnt;
271
/* Delete extra_zeros_cnt worth of characters from the
272
front of the exponent */
273
assert(extra_zeros_cnt >= 0);
275
/* Add one to significant_digit_cnt to copy the
276
trailing 0 byte, thus setting the length */
278
start + extra_zeros_cnt,
279
significant_digit_cnt + 1);
282
/* If there are fewer than 2 digits, add zeros
283
until there are 2, if there's enough room */
284
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
285
if (start + zeros + exponent_digit_cnt + 1
286
< buffer + buf_size) {
287
memmove(start + zeros, start,
288
exponent_digit_cnt + 1);
289
memset(start, '0', zeros);
295
/* Ensure that buffer has a decimal point in it. The decimal point
296
will not be in the current locale, it will always be '.' */
297
Py_LOCAL_INLINE(void)
298
ensure_decimal_point(char* buffer, size_t buf_size)
300
int insert_count = 0;
301
char* chars_to_insert;
303
/* search for the first non-digit character */
305
if (*p == '-' || *p == '+')
306
/* Skip leading sign, if present. I think this could only
307
ever be '-', but it can't hurt to check for both. */
309
while (*p && isdigit(Py_CHARMASK(*p)))
313
if (isdigit(Py_CHARMASK(*(p+1)))) {
314
/* Nothing to do, we already have a decimal
315
point and a digit after it */
318
/* We have a decimal point, but no following
319
digit. Insert a zero after the decimal. */
321
chars_to_insert = "0";
326
chars_to_insert = ".0";
330
size_t buf_len = strlen(buffer);
331
if (buf_len + insert_count + 1 >= buf_size) {
332
/* If there is not enough room in the buffer
333
for the additional text, just skip it. It's
334
not worth generating an error over. */
337
memmove(p + insert_count, p,
338
buffer + strlen(buffer) - p + 1);
339
memcpy(p, chars_to_insert, insert_count);
344
/* Add the locale specific grouping characters to buffer. Note
345
that any decimal point (if it's present) in buffer is already
346
locale-specific. Return 0 on error, else 1. */
348
add_thousands_grouping(char* buffer, size_t buf_size)
350
Py_ssize_t len = strlen(buffer);
351
struct lconv *locale_data = localeconv();
352
const char *decimal_point = locale_data->decimal_point;
354
/* Find the decimal point, if any. We're only concerned
355
about the characters to the left of the decimal when
357
char *p = strstr(buffer, decimal_point);
359
/* No decimal, use the entire string. */
361
/* If any exponent, adjust p. */
362
p = strpbrk(buffer, "eE");
364
/* No exponent and no decimal. Use the entire
368
/* At this point, p points just past the right-most character we
369
want to format. We need to add the grouping string for the
370
characters between buffer and p. */
371
return _PyBytes_InsertThousandsGrouping(buffer, len, p-buffer,
375
/* see FORMATBUFLEN in unicodeobject.c */
376
#define FLOAT_FORMATBUFLEN 120
379
* PyOS_ascii_formatd:
380
* @buffer: A buffer to place the resulting string in
381
* @buf_size: The length of the buffer.
382
* @format: The printf()-style format to use for the
383
* code to use for converting.
384
* @d: The #gdouble to convert
386
* Converts a #gdouble to a string, using the '.' as
387
* decimal point. To format the number you pass in
388
* a printf()-style format string. Allowed conversion
389
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
391
* 'n' is the same as 'g', except it uses the current locale.
392
* 'Z' is the same as 'g', except it always has a decimal and
393
* at least one digit after the decimal.
395
* Return value: The pointer to the buffer with the converted string.
398
PyOS_ascii_formatd(char *buffer,
404
size_t format_len = strlen(format);
406
/* For type 'n', we need to make a copy of the format string, because
407
we're going to modify 'n' -> 'g', and format is const char*, so we
408
can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
409
we ever need this to be. There's an upcoming check to ensure it's
411
/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
412
also with at least one character past the decimal. */
413
char tmp_format[FLOAT_FORMATBUFLEN];
415
/* The last character in the format string must be the format char */
416
format_char = format[format_len - 1];
418
if (format[0] != '%')
421
/* I'm not sure why this test is here. It's ensuring that the format
422
string after the first character doesn't have a single quote, a
423
lowercase l, or a percent. This is the reverse of the commented-out
424
test about 10 lines ago. */
425
if (strpbrk(format + 1, "'l%"))
428
/* Also curious about this function is that it accepts format strings
429
like "%xg", which are invalid for floats. In general, the
430
interface to this function is not very good, but changing it is
431
difficult because it's a public API. */
433
if (!(format_char == 'e' || format_char == 'E' ||
434
format_char == 'f' || format_char == 'F' ||
435
format_char == 'g' || format_char == 'G' ||
436
format_char == 'n' || format_char == 'Z'))
439
/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
440
replacing the final char with a 'g' */
441
if (format_char == 'n' || format_char == 'Z') {
442
if (format_len + 1 >= sizeof(tmp_format)) {
443
/* The format won't fit in our copy. Error out. In
444
practice, this will never happen and will be
445
detected by returning NULL */
448
strcpy(tmp_format, format);
449
tmp_format[format_len - 1] = 'g';
454
/* Have PyOS_snprintf do the hard work */
455
PyOS_snprintf(buffer, buf_size, format, d);
457
/* Do various fixups on the return string */
459
/* Get the current locale, and find the decimal point string.
460
Convert that string back to a dot. Do not do this if using the
461
'n' (number) format code, since we want to keep the localized
462
decimal point in that case. */
463
if (format_char != 'n')
464
change_decimal_from_locale_to_dot(buffer);
466
/* If an exponent exists, ensure that the exponent is at least
467
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
468
for the extra zeros. Also, if there are more than
469
MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
470
back to MIN_EXPONENT_DIGITS */
471
ensure_minumim_exponent_length(buffer, buf_size);
473
/* If format_char is 'Z', make sure we have at least one character
474
after the decimal point (and make sure we have a decimal point). */
475
if (format_char == 'Z')
476
ensure_decimal_point(buffer, buf_size);
478
/* If format_char is 'n', add the thousands grouping. */
479
if (format_char == 'n')
480
if (!add_thousands_grouping(buffer, buf_size))
487
PyOS_ascii_atof(const char *nptr)
489
return PyOS_ascii_strtod(nptr, NULL);