2
* convert from text form of arbitrary data (e.g., keys) to binary
3
* Copyright (C) 2000 Henry Spencer.
5
* This library is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU Library General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or (at your
8
* option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
10
* This library is distributed in the hope that it will be useful, but
11
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13
* License for more details.
15
* RCSID $Id: ttodata.c,v 1.12 2004/04/09 18:00:37 mcr Exp $
20
/* converters and misc */
21
static int unhex(const char *, char *, size_t);
22
static int unb64(const char *, char *, size_t);
23
static int untext(const char *, char *, size_t);
24
static const char *badch(const char *, int, char *, size_t);
26
/* internal error codes for converters */
27
#define SHORT (-2) /* internal buffer too short */
28
#define BADPAD (-3) /* bad base64 padding */
29
#define BADCH0 (-4) /* invalid character 0 */
30
#define BADCH1 (-5) /* invalid character 1 */
31
#define BADCH2 (-6) /* invalid character 2 */
32
#define BADCH3 (-7) /* invalid character 3 */
33
#define BADOFF(code) (BADCH0-(code))
36
- ttodatav - convert text to data, with verbose error reports
37
* If some of this looks slightly odd, it's because it has changed
38
* repeatedly (from the original atodata()) without a major rewrite.
40
const char * /* NULL on success, else literal or errp */
41
ttodatav(src, srclen, base, dst, dstlen, lenp, errp, errlen, flags)
43
size_t srclen; /* 0 means apply strlen() */
44
int base; /* 0 means figure it out */
45
char *dst; /* need not be valid if dstlen is 0 */
47
size_t *lenp; /* where to record length (NULL is nowhere) */
48
char *errp; /* error buffer */
52
size_t ingroup; /* number of input bytes converted at once */
53
char buf[4]; /* output from conversion */
54
int nbytes; /* size of output */
55
int (*decode)(const char *, char *, size_t);
65
dst = buf; /* point it somewhere valid */
70
return "input too short to be valid";
72
return "input does not begin with format prefix";
87
return "unknown format prefix";
101
if(flags & TTODATAV_IGNORESPACE) {
112
return "unknown base";
118
char stage[4]; /* staging area for group */
121
/* Grab ingroup characters into stage,
122
* squeezing out blanks if we are supposed to ignore them.
124
for (sl = 0; sl < ingroup; src++, srclen--) {
126
return "input ends in mid-byte, perhaps truncated";
127
else if (!(skipSpace && (*src == ' ' || *src == '\t')))
131
nbytes = (*decode)(stage, buf, sizeof(buf));
137
return badch(stage, nbytes, errp, errlen);
139
return "internal buffer too short (\"can't happen\")";
141
return "bad (non-zero) padding at end of base64 input";
144
return "unknown internal error";
145
for (i = 0; i < nbytes; i++) {
150
while (srclen >= 1 && skipSpace && (*src == ' ' || *src == '\t')){
154
if (underscoreok && srclen > 1 && *src == '_') {
155
/* srclen > 1 means not last character */
162
return "no data bytes specified by input";
169
- ttodata - convert text to data
171
const char * /* NULL on success, else literal */
172
ttodata(src, srclen, base, dst, dstlen, lenp)
174
size_t srclen; /* 0 means apply strlen() */
175
int base; /* 0 means figure it out */
176
char *dst; /* need not be valid if dstlen is 0 */
178
size_t *lenp; /* where to record length (NULL is nowhere) */
180
return ttodatav(src, srclen, base, dst, dstlen, lenp, (char *)NULL,
181
(size_t)0, TTODATAV_SPACECOUNTS);
185
- atodata - convert ASCII to data
186
* backward-compatibility interface
188
size_t /* 0 for failure, true length for success */
189
atodata(src, srclen, dst, dstlen)
198
err = ttodata(src, srclen, 0, dst, dstlen, &len);
205
- atobytes - convert ASCII to data bytes
206
* another backward-compatibility interface
209
atobytes(src, srclen, dst, dstlen, lenp)
216
return ttodata(src, srclen, 0, dst, dstlen, lenp);
220
- unhex - convert two ASCII hex digits to byte
222
static int /* number of result bytes, or error code */
223
unhex(src, dst, dstlen)
224
const char *src; /* known to be full length */
226
size_t dstlen; /* not large enough is a failure */
230
static char hex[] = "0123456789abcdef";
235
p = strchr(hex, *src);
237
p = strchr(hex, tolower(*src));
240
byte = (p - hex) << 4;
243
p = strchr(hex, *src);
245
p = strchr(hex, tolower(*src));
255
- unb64 - convert four ASCII base64 digits to three bytes
256
* Note that a base64 digit group is padded out with '=' if it represents
257
* less than three bytes: one byte is dd==, two is ddd=, three is dddd.
259
static int /* number of result bytes, or error code */
260
unb64(src, dst, dstlen)
261
const char *src; /* known to be full length */
268
static char base64[] =
269
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
274
p = strchr(base64, *src++);
278
byte1 = (p - base64) << 2; /* first six bits */
280
p = strchr(base64, *src++);
285
byte2 = p - base64; /* next six: two plus four */
286
*dst++ = byte1 | (byte2 >> 4);
287
byte1 = (byte2 & 0xf) << 4;
289
p = strchr(base64, *src++);
291
if (*(src-1) == '=' && *src == '=') {
292
if (byte1 != 0) /* bad padding */
299
byte2 = p - base64; /* next six: four plus two */
300
*dst++ = byte1 | (byte2 >> 2);
301
byte1 = (byte2 & 0x3) << 6;
303
p = strchr(base64, *src++);
305
if (*(src-1) == '=') {
306
if (byte1 != 0) /* bad padding */
312
byte2 = p - base64; /* last six */
313
*dst++ = byte1 | byte2;
319
- untext - convert one ASCII character to byte
321
static int /* number of result bytes, or error code */
322
untext(src, dst, dstlen)
323
const char *src; /* known to be full length */
325
size_t dstlen; /* not large enough is a failure */
335
- badch - produce a nice complaint about an unknown character
337
* If the compiler complains that the array bigenough[] has a negative
338
* size, that means the TTODATAV_BUF constant has been set too small.
340
static const char * /* literal or errp */
341
badch(src, errcode, errp, errlen)
344
char *errp; /* might be NULL */
347
static const char pre[] = "unknown character (`";
348
static const char suf[] = "') in input";
350
# define REQD (sizeof(pre) - 1 + sizeof(buf) - 1 + sizeof(suf))
352
char bigenough[TTODATAV_BUF - REQD]; /* see above */
356
if (errp == NULL || errlen < REQD)
357
return "unknown character in input";
359
ch = *(src + BADOFF(errcode));
365
buf[1] = ((ch & 0700) >> 6) + '0';
366
buf[2] = ((ch & 0070) >> 3) + '0';
367
buf[3] = ((ch & 0007) >> 0) + '0';
372
return (const char *)errp;
382
static void check(struct artab *r, char *buf, size_t n, err_t oops, int *status);
383
static void regress(char *pgm);
384
static void hexout(const char *s, size_t len, FILE *f);
387
- main - convert first argument to hex, or run regression
390
main(int argc, char *argv[])
403
fprintf(stderr, "Usage: %s {0x<hex>|0s<base64>|-r}\n", pgm);
407
if (strcmp(argv[1], "-r") == 0) {
408
regress(pgm); /* should not return */
409
fprintf(stderr, "%s: regress() returned?!?\n", pgm);
413
oops = ttodatav(argv[1], 0, 0, buf, sizeof(buf), &n,
414
err, sizeof(err), TTODATAV_IGNORESPACE);
416
fprintf(stderr, "%s: ttodata error `%s' in `%s'\n", pgm,
421
if (n > sizeof(buf)) {
422
p = (char *)malloc((size_t)n);
425
"%s: unable to malloc %d bytes for result\n",
429
oops = ttodata(argv[1], 0, 0, p, n, &n);
431
fprintf(stderr, "%s: error `%s' in ttodata retry?!?\n",
437
hexout(p, n, stdout);
440
i = datatot(buf, n, 'h', buf2, sizeof(buf2));
442
fprintf(stderr, "%s: datatot reports error in `%s'\n", pgm,
447
if (i > sizeof(buf2)) {
448
p2 = (char *)malloc((size_t)i);
451
"%s: unable to malloc %d bytes for result\n",
455
i = datatot(buf, n, 'h', p2, i);
457
fprintf(stderr, "%s: error in datatoa retry?!?\n", pgm);
468
- hexout - output an arbitrary-length string in hex
479
for (i = 0; i < len; i++)
480
fprintf(f, "%02x", (unsigned char)s[i]);
485
# define IGNORESPACE_BIAS 1000
486
char *ascii; /* NULL for end */
487
char *data; /* NULL for error expected */
493
{ 0, "0xab", "\xab", },
494
{ 0, "0xabc", NULL, },
495
{ 0, "0xabcd", "\xab\xcd", },
496
{ 0, "0x0123456789", "\x01\x23\x45\x67\x89", },
497
{ 0, "0x01x", NULL, },
498
{ 0, "0xabcdef", "\xab\xcd\xef", },
499
{ 0, "0xABCDEF", "\xab\xcd\xef", },
500
{ 0, "0XaBc0eEd81f", "\xab\xc0\xee\xd8\x1f", },
501
{ 0, "0XaBc0_eEd8", "\xab\xc0\xee\xd8", },
502
{ 0, "0XaBc0_", NULL, },
503
{ 0, "0X_aBc0", NULL, },
504
{ 0, "0Xa_Bc0", NULL, },
505
{ 16, "aBc0eEd8", "\xab\xc0\xee\xd8", },
508
{ 0, "0sBA", NULL, },
509
{ 0, "0sCBA", NULL, },
510
{ 0, "0sDCBA", "\x0c\x20\x40", },
511
{ 0, "0SDCBA", "\x0c\x20\x40", },
512
{ 0, "0sDA==", "\x0c", },
513
{ 0, "0sDC==", NULL, },
514
{ 0, "0sDCA=", "\x0c\x20", },
515
{ 0, "0sDCB=", NULL, },
516
{ 0, "0sDCAZ", "\x0c\x20\x19", },
517
{ 0, "0sDCAa", "\x0c\x20\x1a", },
518
{ 0, "0sDCAz", "\x0c\x20\x33", },
519
{ 0, "0sDCA0", "\x0c\x20\x34", },
520
{ 0, "0sDCA9", "\x0c\x20\x3d", },
521
{ 0, "0sDCA+", "\x0c\x20\x3e", },
522
{ 0, "0sDCA/", "\x0c\x20\x3f", },
523
{ 0, "0sAbraCadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
524
{ IGNORESPACE_BIAS + 0, "0s AbraCadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
525
{ IGNORESPACE_BIAS + 0, "0sA braCadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
526
{ IGNORESPACE_BIAS + 0, "0sAb raCadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
527
{ IGNORESPACE_BIAS + 0, "0sAbr aCadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
528
{ IGNORESPACE_BIAS + 0, "0sAbra Cadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
529
{ IGNORESPACE_BIAS + 0, "0sAbraC adabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
530
{ IGNORESPACE_BIAS + 0, "0sAbraCa dabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
531
{ IGNORESPACE_BIAS + 0, "0sAbraCad abra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
532
{ IGNORESPACE_BIAS + 0, "0sAbraCada bra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
533
{ IGNORESPACE_BIAS + 0, "0sAbraCadab ra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
534
{ IGNORESPACE_BIAS + 0, "0sAbraCadabr a+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
535
{ IGNORESPACE_BIAS + 0, "0sAbraCadabra +", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
536
{ IGNORESPACE_BIAS + 0, "0sAbraCadabra+ ", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", },
538
{ 0, "0tabc_xyz", "abc_xyz", },
539
{ 256, "abc_xyz", "abc_xyz", },
544
char *data; /* input; NULL for end */
546
int buflen; /* -1 means big buffer */
547
int outlen; /* -1 means strlen(ascii)+1 */
548
char *ascii; /* NULL for error expected */
550
{ "", 'x', -1, -1, NULL, },
551
{ "", 'X', -1, -1, NULL, },
552
{ "", 'n', -1, -1, NULL, },
553
{ "0", 'x', -1, -1, "0x30", },
554
{ "0", 'x', 0, 5, "---", },
555
{ "0", 'x', 1, 5, "", },
556
{ "0", 'x', 2, 5, "0", },
557
{ "0", 'x', 3, 5, "0x", },
558
{ "0", 'x', 4, 5, "0x3", },
559
{ "0", 'x', 5, 5, "0x30", },
560
{ "0", 'x', 6, 5, "0x30", },
561
{ "\xab\xcd", 'x', -1, -1, "0xabcd", },
562
{ "\x01\x23\x45\x67\x89", 'x', -1, -1, "0x0123456789", },
563
{ "\xab\xcd\xef", 'x', -1, -1, "0xabcdef", },
564
{ "\xab\xc0\xee\xd8\x1f", 'x', -1, -1, "0xabc0eed81f", },
565
{ "\x01\x02", 'h', -1, -1, "0x0102", },
566
{ "\x01\x02\x03\x04\x05\x06", 'h', -1, -1, "0x01020304_0506", },
567
{ "\xab\xc0\xee\xd8\x1f", 16, -1, -1, "abc0eed81f", },
568
{ "\x0c\x20\x40", 's', -1, -1, "0sDCBA", },
569
{ "\x0c\x20\x40", 's', 0, 7, "---", },
570
{ "\x0c\x20\x40", 's', 1, 7, "", },
571
{ "\x0c\x20\x40", 's', 2, 7, "0", },
572
{ "\x0c\x20\x40", 's', 3, 7, "0s", },
573
{ "\x0c\x20\x40", 's', 4, 7, "0sD", },
574
{ "\x0c\x20\x40", 's', 5, 7, "0sDC", },
575
{ "\x0c\x20\x40", 's', 6, 7, "0sDCB", },
576
{ "\x0c\x20\x40", 's', 7, 7, "0sDCBA", },
577
{ "\x0c\x20\x40", 's', 8, 7, "0sDCBA", },
578
{ "\x0c", 's', -1, -1, "0sDA==", },
579
{ "\x0c\x20", 's', -1, -1, "0sDCA=", },
580
{ "\x0c\x20\x19", 's', -1, -1, "0sDCAZ", },
581
{ "\x0c\x20\x1a", 's', -1, -1, "0sDCAa", },
582
{ "\x0c\x20\x33", 's', -1, -1, "0sDCAz", },
583
{ "\x0c\x20\x34", 's', -1, -1, "0sDCA0", },
584
{ "\x0c\x20\x3d", 's', -1, -1, "0sDCA9", },
585
{ "\x0c\x20\x3e", 's', -1, -1, "0sDCA+", },
586
{ "\x0c\x20\x3f", 's', -1, -1, "0sDCA/", },
587
{ "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", 's', -1, -1, "0sAbraCadabra+", },
588
{ "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", 64, -1, -1, "AbraCadabra+", },
589
{ NULL, 'x', -1, -1, NULL, },
593
- regress - regression-test ttodata() and datatot()
596
check(r, buf, n, oops, status)
603
if (oops != NULL && r->data == NULL)
604
{} /* error expected */
605
else if (oops != NULL) {
606
printf("`%s' gave error `%s', expecting %d `", r->ascii,
607
oops, strlen(r->data));
608
hexout(r->data, strlen(r->data), stdout);
611
} else if (r->data == NULL) {
612
printf("`%s' gave %d `", r->ascii, n);
613
hexout(buf, n, stdout);
614
printf("', expecting error\n");
616
} else if (n != strlen(r->data)) {
617
printf("length wrong in `%s': got %d `", r->ascii, n);
618
hexout(buf, n, stdout);
619
printf("', expecting %d `", strlen(r->data));
620
hexout(r->data, strlen(r->data), stdout);
623
} else if (memcmp(buf, r->data, n) != 0) {
624
printf("`%s' gave %d `", r->ascii, n);
625
hexout(buf, n, stdout);
626
printf("', expecting %d `", strlen(r->data));
627
hexout(r->data, strlen(r->data), stdout);
634
static void /* should not return at all, in fact */
644
for (r = atodatatab; r->ascii != NULL; r++) {
648
if ((base == 0 || base == IGNORESPACE_BIAS + 0) && r->ascii[0] == '0') {
649
switch (r->ascii[1]) {
665
if (base >= IGNORESPACE_BIAS) {
666
base = base - IGNORESPACE_BIAS;
667
check(r, buf, n, ttodatav(r->ascii, 0, base, buf, sizeof(buf), &n, NULL, 0, TTODATAV_IGNORESPACE), &status);
669
check(r, buf, n, ttodatav(r->ascii+2, 0, xbase, buf, sizeof(buf), &n, NULL, 0, TTODATAV_IGNORESPACE), &status);
671
check(r, buf, n, ttodata(r->ascii, 0, base, buf, sizeof(buf), &n), &status);
672
if (base == 64 || xbase == 64)
673
check(r, buf, n, ttodatav(r->ascii, 0, base, buf, sizeof(buf), &n, NULL, 0, TTODATAV_IGNORESPACE), &status);
675
check(r, buf, n, ttodata(r->ascii+2, 0, xbase, buf, sizeof(buf), &n), &status);
676
if (base == 64 || xbase == 64)
677
check(r, buf, n, ttodatav(r->ascii+2, 0, xbase, buf, sizeof(buf), &n, NULL, 0, TTODATAV_IGNORESPACE), &status);
681
for (dr = datatoatab; dr->data != NULL; dr++) {
685
n = datatot(dr->data, strlen(dr->data), dr->format, buf,
686
(dr->buflen == -1) ? sizeof(buf) : dr->buflen);
687
should = (dr->ascii == NULL) ? 0 : strlen(dr->ascii) + 1;
688
if (dr->outlen != -1)
690
if (n == 0 && dr->ascii == NULL)
691
{} /* error expected */
694
hexout(dr->data, strlen(dr->data), stdout);
695
printf("' %c gave error, expecting %d `%s'\n",
696
dr->format, should, dr->ascii);
698
} else if (dr->ascii == NULL) {
700
hexout(dr->data, strlen(dr->data), stdout);
701
printf("' %c gave %d `%.*s', expecting error\n",
702
dr->format, n, (int)n, buf);
704
} else if (n != should) {
705
printf("length wrong in `");
706
hexout(dr->data, strlen(dr->data), stdout);
707
printf("': got %d `%s'", n, buf);
708
printf(", expecting %d `%s'\n", should, dr->ascii);
710
} else if (strcmp(buf, dr->ascii) != 0) {
712
hexout(dr->data, strlen(dr->data), stdout);
713
printf("' gave %d `%s'", n, buf);
714
printf(", expecting %d `%s'\n", should, dr->ascii);
722
#endif /* TTODATA_MAIN */