1
/**********************************************************************
2
* $Id: e00read.c,v 1.10 2009-02-24 20:03:50 aboudreault Exp $
5
* Project: Compressed E00 Read/Write library
7
* Purpose: Functions to read Compressed E00 files and return a stream
8
* of uncompressed lines.
9
* Author: Daniel Morissette, dmorissette@mapgears.com
12
* Revision 1.10 2009-02-24 20:03:50 aboudreault
13
* Added a short manual pages (#1875)
14
* Updated documentation and code examples (#247)
16
* Revision 1.9 2005-09-17 14:22:05 daniel
17
* Switch to MIT license, update refs to website and email address, and
18
* prepare for 1.0.0 release.
20
* Revision 1.8 1999/02/25 18:45:56 daniel
21
* Now use CPL for Error handling, Memory allocation, and File access
23
* Revision 1.7 1999/01/08 17:39:08 daniel
24
* Added E00ReadCallbackOpen()
26
* Revision 1.6 1998/11/13 16:34:08 daniel
27
* Fixed '\r' problem when reading E00 files from a PC under Unix
29
* Revision 1.5 1998/11/13 15:48:08 daniel
30
* Simplified the decoding of the compression codes for numbers
31
* (use a logical rule instead of going case by case)
33
* Revision 1.4 1998/11/02 18:34:29 daniel
34
* Added E00ErrorReset() calls. Replace "EXP 1" by "EXP 0" on read.
36
* Revision 1.1 1998/10/29 13:26:00 daniel
39
**********************************************************************
40
* Copyright (c) 1998-2005, Daniel Morissette
42
* Permission is hereby granted, free of charge, to any person obtaining a
43
* copy of this software and associated documentation files (the "Software"),
44
* to deal in the Software without restriction, including without limitation
45
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
46
* and/or sell copies of the Software, and to permit persons to whom the
47
* Software is furnished to do so, subject to the following conditions:
49
* The above copyright notice and this permission notice shall be included
50
* in all copies or substantial portions of the Software.
52
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
53
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
54
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
55
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
56
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
57
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
58
* DEALINGS IN THE SOFTWARE.
60
**********************************************************************/
69
static void _ReadNextSourceLine(E00ReadPtr psInfo);
70
static const char *_UncompressNextLine(E00ReadPtr psInfo);
72
/**********************************************************************
75
* Given a pre-initialized E00ReadPtr, this function will make sure
76
* that the file is really a E00 file, and also establish if it is
77
* compressed or not... setting the structure members by the same way.
79
* Returns NULL (and destroys the E00ReadPtr) if the file does not
80
* appear to be a valid E00 file.
81
**********************************************************************/
82
static E00ReadPtr _E00ReadTestOpen(E00ReadPtr psInfo)
85
/* Check that the file is in E00 format.
87
_ReadNextSourceLine(psInfo);
88
if (!psInfo->bEOF && strncmp(psInfo->szInBuf, "EXP ", 4) == 0)
90
/* We should be in presence of a valid E00 file...
91
* Is the file compressed or not?
93
* Note: we cannot really rely on the number that follows the EXP to
94
* establish if the file is compressed since we sometimes encounter
95
* uncompressed files that start with a "EXP 1" line!!!
97
* The best test is to read the first non-empty line: if the file is
98
* compressed, the first line of data should be 79 or 80 characters
99
* long and contain several '~' characters.
103
_ReadNextSourceLine(psInfo);
104
}while(!psInfo->bEOF &&
105
(psInfo->szInBuf[0] == '\0' || isspace(psInfo->szInBuf[0])) );
108
(strlen(psInfo->szInBuf)==79 || strlen(psInfo->szInBuf)==80) &&
109
strchr(psInfo->szInBuf, '~') != NULL )
110
psInfo->bIsCompressed = 1;
112
/* Move the Read ptr ready to read at the beginning of the file
114
E00ReadRewind(psInfo);
125
/**********************************************************************
128
* Try to open a E00 file given its filename and return a E00ReadPtr handle.
130
* Returns NULL if the file could not be opened or if it does not
131
* appear to be a valid E00 file.
132
**********************************************************************/
133
E00ReadPtr E00ReadOpen(const char *pszFname)
135
E00ReadPtr psInfo = NULL;
142
fp = VSIFOpen(pszFname, "rt");
145
CPLError(CE_Failure, CPLE_OpenFailed,
146
"Failed to open %s: %s", pszFname, strerror(errno));
150
/* File was succesfully opened, allocate and initialize a
151
* E00ReadPtr handle and check that the file is valid.
153
psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
157
psInfo = _E00ReadTestOpen(psInfo);
161
CPLError(CE_Failure, CPLE_OpenFailed,
162
"%s is not a valid E00 file.", pszFname);
168
/**********************************************************************
169
* E00ReadCallbackOpen()
171
* This is an alternative to E00ReadOpen() for cases where you want to
172
* do all the file management yourself. You open/close the file yourself
173
* and provide 2 callback functions: to read from the file and rewind the
174
* file pointer. pRefData is your handle on the physical file and can
175
* be whatever you want... it is not used by the library, it will be
176
* passed directly to your 2 callback functions when they are called.
178
* The callback functions must have the following C prototype:
180
* const char *myReadNextLine(void *pRefData);
181
* void myReadRewind(void *pRefData);
183
* myReadNextLine() should return a reference to its own internal
184
* buffer, or NULL if an error happens or EOF is reached.
186
* E00ReadCallbackOpen() returns a E00ReadPtr handle or NULL if the file
187
* does not appear to be a valid E00 file.
188
**********************************************************************/
189
E00ReadPtr E00ReadCallbackOpen(void *pRefData,
190
const char * (*pfnReadNextLine)(void *),
191
void (*pfnReadRewind)(void *))
193
E00ReadPtr psInfo = NULL;
197
/* Make sure we received valid function pointers
199
if (pfnReadNextLine == NULL || pfnReadRewind == NULL)
201
CPLError(CE_Failure, CPLE_IllegalArg,
202
"Invalid function pointers!");
206
/* Allocate and initialize a
207
* E00ReadPtr handle and check that the file is valid.
209
psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
211
psInfo->pRefData = pRefData;
212
psInfo->pfnReadNextLine = pfnReadNextLine;
213
psInfo->pfnReadRewind = pfnReadRewind;
215
psInfo = _E00ReadTestOpen(psInfo);
219
CPLError(CE_Failure, CPLE_OpenFailed,
220
"This is not a valid E00 file.");
226
/**********************************************************************
229
* Close input file and release any memory used by the E00ReadPtr.
230
**********************************************************************/
231
void E00ReadClose(E00ReadPtr psInfo)
238
VSIFClose(psInfo->fp);
243
/**********************************************************************
246
* Rewind the E00ReadPtr. Allows to start another read pass on the
248
**********************************************************************/
249
void E00ReadRewind(E00ReadPtr psInfo)
253
psInfo->szInBuf[0] = psInfo->szOutBuf[0] = '\0';
254
psInfo->iInBufPtr = 0;
256
psInfo->nInputLineNo = 0;
258
if (psInfo->pfnReadRewind == NULL)
259
VSIRewind(psInfo->fp);
261
psInfo->pfnReadRewind(psInfo->pRefData);
266
/**********************************************************************
269
* Return the next line of input from the E00 file or NULL if we reached EOF.
271
* Returns a reference to an internal buffer whose contents will be valid
272
* only until the next call to this function.
273
**********************************************************************/
274
const char *E00ReadNextLine(E00ReadPtr psInfo)
276
const char *pszLine = NULL;
281
if (psInfo && !psInfo->bEOF)
283
if (!psInfo->bIsCompressed)
285
/* Uncompressed file... return line directly.
287
_ReadNextSourceLine(psInfo);
288
pszLine = psInfo->szInBuf;
290
else if (psInfo->bIsCompressed && psInfo->nInputLineNo == 0)
292
/* Header line in a compressed file... return line
293
* after replacing "EXP 1" with "EXP 0". E00ReadOpen()
294
* has already verified that this line starts with "EXP "
296
_ReadNextSourceLine(psInfo);
297
if ( (pszPtr = strstr(psInfo->szInBuf, " 1")) != NULL)
299
pszLine = psInfo->szInBuf;
303
if (psInfo->nInputLineNo == 1)
305
/* We just read the header line... reload the input buffer
307
_ReadNextSourceLine(psInfo);
310
/* Uncompress the next line of input and return it
312
pszLine = _UncompressNextLine(psInfo);
315
/* If we just reached EOF then make sure we don't add an extra
316
* empty line at the end of the uncompressed oputput.
318
if (psInfo->bEOF && strlen(pszLine) == 0)
325
/**********************************************************************
326
* _ReadNextSourceLine()
328
* Loads the next line from the source file in psInfo.
330
* psInfo->bEOF should be checked after this call.
331
**********************************************************************/
332
static void _ReadNextSourceLine(E00ReadPtr psInfo)
336
psInfo->iInBufPtr = 0;
337
psInfo->szInBuf[0] = '\0';
339
/* Read either using fgets() or psInfo->pfnReadNextLine()
340
* depending on the way the file was opened...
342
if (psInfo->pfnReadNextLine == NULL)
344
if (VSIFGets(psInfo->szInBuf,E00_READ_BUF_SIZE,psInfo->fp) == NULL)
354
pszLine = psInfo->pfnReadNextLine(psInfo->pRefData);
357
strncpy(psInfo->szInBuf, pszLine, E00_READ_BUF_SIZE);
369
/* A new line was succesfully read. Remove trailing '\n' if any.
370
* (Note: For Unix systems, we also have to check for '\r')
373
nLen = strlen(psInfo->szInBuf);
374
while(nLen > 0 && (psInfo->szInBuf[nLen-1] == '\n' ||
375
psInfo->szInBuf[nLen-1] == '\r' ) )
378
psInfo->szInBuf[nLen] = '\0';
381
psInfo->nInputLineNo++;
387
/**********************************************************************
388
* _GetNextSourceChar()
390
* Returns the next char from the source file input buffer... and
391
* reload the input buffer when necessary... this function makes the
392
* whole input file appear as one huge null-terminated string with
393
* no line delimiters.
395
* Will return '\0' when EOF is reached.
396
**********************************************************************/
397
static char _GetNextSourceChar(E00ReadPtr psInfo)
403
if (psInfo->szInBuf[psInfo->iInBufPtr] == '\0')
405
_ReadNextSourceLine(psInfo);
406
c = _GetNextSourceChar(psInfo);
410
c = psInfo->szInBuf[psInfo->iInBufPtr++];
417
/**********************************************************************
420
* Reverse the effect of the previous call to _GetNextSourceChar() by
421
* moving the input buffer pointer back 1 character.
423
* This function can be called only once per call to _GetNextSourceChar()
424
* (i.e. you cannot unget more than one character) otherwise the pointer
425
* could move before the beginning of the input buffer.
426
**********************************************************************/
427
static void _UngetSourceChar(E00ReadPtr psInfo)
429
if (psInfo->iInBufPtr > 0)
433
/* This error can happen only if _UngetSourceChar() is called
434
* twice in a row (which should never happen!).
436
CPLError(CE_Failure, CPLE_AssertionFailed,
437
"UNEXPECTED INTERNAL ERROR: _UngetSourceChar() "
438
"failed while reading line %d.", psInfo->nInputLineNo);
442
/**********************************************************************
443
* _UncompressNextLine()
445
* Uncompress one line of input and return a reference to an internal
446
* buffer containing the uncompressed output.
447
**********************************************************************/
448
static const char *_UncompressNextLine(E00ReadPtr psInfo)
451
int bEOL = 0; /* Set to 1 when End of Line reached */
452
int iOutBufPtr = 0, i, n;
453
int iDecimalPoint, bOddNumDigits, iCurDigit;
455
int bPreviousCodeWasNumeric = 0;
457
while(!bEOL && (c=_GetNextSourceChar(psInfo)) != '\0')
461
/* Normal character... just copy it
463
psInfo->szOutBuf[iOutBufPtr++] = c;
464
bPreviousCodeWasNumeric = 0;
468
/* ========================================================
469
* Found an encoded sequence.
470
* =======================================================*/
471
c = _GetNextSourceChar(psInfo);
473
/* --------------------------------------------------------
474
* Compression level 1: only spaces, '~' and '\n' are encoded
475
* -------------------------------------------------------*/
478
/* "~ " followed by number of spaces
480
c = _GetNextSourceChar(psInfo);
483
psInfo->szOutBuf[iOutBufPtr++] = ' ';
484
bPreviousCodeWasNumeric = 0;
491
bPreviousCodeWasNumeric = 0;
493
else if (bPreviousCodeWasNumeric)
495
/* If the previous code was numeric, then the only valid code
496
* sequences are the ones above: "~ " and "~}". If we end up
497
* here, it is because the number was followed by a '~' but
498
* this '~' was not a code, it only marked the end of a
499
* number that was not followed by any space.
501
* We should simply ignore the '~' and return the character
502
* that follows it directly.
504
psInfo->szOutBuf[iOutBufPtr++] = c;
505
bPreviousCodeWasNumeric = 0;
507
else if (c == '~' || c == '-')
509
/* "~~" and "~-" are simple escape sequences for '~' and '-'
511
psInfo->szOutBuf[iOutBufPtr++] = c;
513
/* --------------------------------------------------------
514
* Compression level 2: numeric values are encoded.
516
* All codes for this level are in the form "~ c0 c1 c2 ... cn"
519
* ~ marks the beginning of a new code sequence
521
* c0 is a single character code defining the format
522
* of the number (decimal position, exponent,
523
* and even or odd number of digits)
525
* c1 c2 ... cn each of these characters represent a pair of
526
* digits of the encoded value with '!' == 00
527
* values 92..99 are encoded on 2 chars that
528
* must be added to each other
529
* (i.e. 92 == }!, 93 == }", ...)
531
* The sequence ends with a ' ' or a '~' character
532
* -------------------------------------------------------*/
533
else if (c >= '!' && c <= 'z')
535
/* The format code defines 3 characteristics of the final number:
536
* - Presence of a decimal point and its position
537
* - Presence of an exponent, and its sign
538
* - Odd or even number of digits
541
iDecimalPoint = n % 15; /* 0 = no decimal point */
542
bOddNumDigits = n / 45; /* 0 = even num.digits, 1 = odd */
546
else if (n % 3 == 2 )
551
/* Decode the c1 c2 ... cn value and apply the format.
552
* Read characters until we encounter a ' ' or a '~'
555
while((c=_GetNextSourceChar(psInfo)) != '\0' &&
556
c != ' ' && c != '~')
559
if (n == 92 && (c=_GetNextSourceChar(psInfo)) != '\0')
562
psInfo->szOutBuf[iOutBufPtr++] = '0' + n/10;
564
if (++iCurDigit == iDecimalPoint)
565
psInfo->szOutBuf[iOutBufPtr++] = '.';
567
psInfo->szOutBuf[iOutBufPtr++] = '0' + n%10;
569
if (++iCurDigit == iDecimalPoint)
570
psInfo->szOutBuf[iOutBufPtr++] = '.';
573
if (c == '~' || c == ' ')
575
bPreviousCodeWasNumeric = 1;
576
_UngetSourceChar(psInfo);
579
/* If odd number of digits, then flush the last one
584
/* Insert the exponent string before the 2 last digits
585
* (we assume the exponent string is 2 chars. long)
591
psInfo->szOutBuf[iOutBufPtr] =
592
psInfo->szOutBuf[iOutBufPtr-2];
593
psInfo->szOutBuf[iOutBufPtr-2] = pszExp[i];
600
/* Unsupported code sequence... this is a possibility
601
* given the fact that this library was written by
602
* reverse-engineering the format!
604
* Send an error to the user and abort.
606
* If this error ever happens, and you are convinced that
607
* the input file is not corrupted, then please report it to
608
* me at dmorissette@mapgears.com, quoting the section of the input
609
* file that produced it, and I'll do my best to add support
610
* for this code sequence.
612
CPLError(CE_Failure, CPLE_NotSupported,
613
"Unexpected code \"~%c\" encountered in line %d.",
614
c, psInfo->nInputLineNo);
616
/* Force the program to abort by simulating a EOF
624
/* E00 lines should NEVER be longer than 80 chars. if we passed
625
* that limit, then the input file is likely corrupt.
629
CPLError(CE_Failure, CPLE_FileIO,
630
"Uncompressed line longer than 80 chars. "
631
"Input file possibly corrupt around line %d.",
632
psInfo->nInputLineNo);
633
/* Force the program to abort by simulating a EOF
641
psInfo->szOutBuf[iOutBufPtr++] = '\0';
643
return psInfo->szOutBuf;