1
/**********************************************************************
2
* $Id: e00read.c,v 1.9 2005/09/17 14:22:05 daniel Exp $
5
* Project: Compressed E00 Read/Write library
7
* Purpose: Functions to read Compressed E00 files and return a stream
8
* of uncompressed lines.
9
* Author: Daniel Morissette, dmorissette@dmsolutions.ca
12
* Revision 1.9 2005/09/17 14:22:05 daniel
13
* Switch to MIT license, update refs to website and email address, and
14
* prepare for 1.0.0 release.
16
* Revision 1.8 1999/02/25 18:45:56 daniel
17
* Now use CPL for Error handling, Memory allocation, and File access
19
* Revision 1.7 1999/01/08 17:39:08 daniel
20
* Added E00ReadCallbackOpen()
22
* Revision 1.6 1998/11/13 16:34:08 daniel
23
* Fixed '\r' problem when reading E00 files from a PC under Unix
25
* Revision 1.5 1998/11/13 15:48:08 daniel
26
* Simplified the decoding of the compression codes for numbers
27
* (use a logical rule instead of going case by case)
29
* Revision 1.4 1998/11/02 18:34:29 daniel
30
* Added E00ErrorReset() calls. Replace "EXP 1" by "EXP 0" on read.
32
* Revision 1.1 1998/10/29 13:26:00 daniel
35
**********************************************************************
36
* Copyright (c) 1998-2005, Daniel Morissette
38
* Permission is hereby granted, free of charge, to any person obtaining a
39
* copy of this software and associated documentation files (the "Software"),
40
* to deal in the Software without restriction, including without limitation
41
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
42
* and/or sell copies of the Software, and to permit persons to whom the
43
* Software is furnished to do so, subject to the following conditions:
45
* The above copyright notice and this permission notice shall be included
46
* in all copies or substantial portions of the Software.
48
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
49
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
50
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
51
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
52
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
53
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
54
* DEALINGS IN THE SOFTWARE.
56
**********************************************************************/
65
static void _ReadNextSourceLine(E00ReadPtr psInfo);
66
static const char *_UncompressNextLine(E00ReadPtr psInfo);
68
/**********************************************************************
71
* Given a pre-initialized E00ReadPtr, this function will make sure
72
* that the file is really a E00 file, and also establish if it is
73
* compressed or not... setting the structure members by the same way.
75
* Returns NULL (and destroys the E00ReadPtr) if the file does not
76
* appear to be a valid E00 file.
77
**********************************************************************/
78
static E00ReadPtr _E00ReadTestOpen(E00ReadPtr psInfo)
81
/* Check that the file is in E00 format.
83
_ReadNextSourceLine(psInfo);
84
if (!psInfo->bEOF && strncmp(psInfo->szInBuf, "EXP ", 4) == 0)
86
/* We should be in presence of a valid E00 file...
87
* Is the file compressed or not?
89
* Note: we cannot really rely on the number that follows the EXP to
90
* establish if the file is compressed since we sometimes encounter
91
* uncompressed files that start with a "EXP 1" line!!!
93
* The best test is to read the first non-empty line: if the file is
94
* compressed, the first line of data should be 79 or 80 characters
95
* long and contain several '~' characters.
99
_ReadNextSourceLine(psInfo);
100
}while(!psInfo->bEOF &&
101
(psInfo->szInBuf[0] == '\0' || isspace(psInfo->szInBuf[0])) );
104
(strlen(psInfo->szInBuf)==79 || strlen(psInfo->szInBuf)==80) &&
105
strchr(psInfo->szInBuf, '~') != NULL )
106
psInfo->bIsCompressed = 1;
108
/* Move the Read ptr ready to read at the beginning of the file
110
E00ReadRewind(psInfo);
121
/**********************************************************************
124
* Try to open a E00 file given its filename and return a E00ReadPtr handle.
126
* Returns NULL if the file could not be opened or if it does not
127
* appear to be a valid E00 file.
128
**********************************************************************/
129
E00ReadPtr E00ReadOpen(const char *pszFname)
131
E00ReadPtr psInfo = NULL;
138
fp = VSIFOpen(pszFname, "rt");
141
CPLError(CE_Failure, CPLE_OpenFailed,
142
"Failed to open %s: %s", pszFname, strerror(errno));
146
/* File was succesfully opened, allocate and initialize a
147
* E00ReadPtr handle and check that the file is valid.
149
psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
153
psInfo = _E00ReadTestOpen(psInfo);
157
CPLError(CE_Failure, CPLE_OpenFailed,
158
"%s is not a valid E00 file.", pszFname);
164
/**********************************************************************
165
* E00ReadCallbackOpen()
167
* This is an alternative to E00ReadOpen() for cases where you want to
168
* do all the file management yourself. You open/close the file yourself
169
* and provide 2 callback functions: to read from the file and rewind the
170
* file pointer. pRefData is your handle on the physical file and can
171
* be whatever you want... it is not used by the library, it will be
172
* passed directly to your 2 callback functions when they are called.
174
* The callback functions must have the following C prototype:
176
* const char *myReadNextLine(void *pRefData);
177
* void myReadRewind(void *pRefData);
179
* myReadNextLine() should return a reference to its own internal
180
* buffer, or NULL if an error happens or EOF is reached.
182
* E00ReadCallbackOpen() returns a E00ReadPtr handle or NULL if the file
183
* does not appear to be a valid E00 file.
184
**********************************************************************/
185
E00ReadPtr E00ReadCallbackOpen(void *pRefData,
186
const char * (*pfnReadNextLine)(void *),
187
void (*pfnReadRewind)(void *))
189
E00ReadPtr psInfo = NULL;
193
/* Make sure we received valid function pointers
195
if (pfnReadNextLine == NULL || pfnReadRewind == NULL)
197
CPLError(CE_Failure, CPLE_IllegalArg,
198
"Invalid function pointers!");
202
/* Allocate and initialize a
203
* E00ReadPtr handle and check that the file is valid.
205
psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
207
psInfo->pRefData = pRefData;
208
psInfo->pfnReadNextLine = pfnReadNextLine;
209
psInfo->pfnReadRewind = pfnReadRewind;
211
psInfo = _E00ReadTestOpen(psInfo);
215
CPLError(CE_Failure, CPLE_OpenFailed,
216
"This is not a valid E00 file.");
222
/**********************************************************************
225
* Close input file and release any memory used by the E00ReadPtr.
226
**********************************************************************/
227
void E00ReadClose(E00ReadPtr psInfo)
234
VSIFClose(psInfo->fp);
239
/**********************************************************************
242
* Rewind the E00ReadPtr. Allows to start another read pass on the
244
**********************************************************************/
245
void E00ReadRewind(E00ReadPtr psInfo)
249
psInfo->szInBuf[0] = psInfo->szOutBuf[0] = '\0';
250
psInfo->iInBufPtr = 0;
252
psInfo->nInputLineNo = 0;
254
if (psInfo->pfnReadRewind == NULL)
255
VSIRewind(psInfo->fp);
257
psInfo->pfnReadRewind(psInfo->pRefData);
262
/**********************************************************************
265
* Return the next line of input from the E00 file or NULL if we reached EOF.
267
* Returns a reference to an internal buffer whose contents will be valid
268
* only until the next call to this function.
269
**********************************************************************/
270
const char *E00ReadNextLine(E00ReadPtr psInfo)
272
const char *pszLine = NULL;
277
if (psInfo && !psInfo->bEOF)
279
if (!psInfo->bIsCompressed)
281
/* Uncompressed file... return line directly.
283
_ReadNextSourceLine(psInfo);
284
pszLine = psInfo->szInBuf;
286
else if (psInfo->bIsCompressed && psInfo->nInputLineNo == 0)
288
/* Header line in a compressed file... return line
289
* after replacing "EXP 1" with "EXP 0". E00ReadOpen()
290
* has already verified that this line starts with "EXP "
292
_ReadNextSourceLine(psInfo);
293
if ( (pszPtr = strstr(psInfo->szInBuf, " 1")) != NULL)
295
pszLine = psInfo->szInBuf;
299
if (psInfo->nInputLineNo == 1)
301
/* We just read the header line... reload the input buffer
303
_ReadNextSourceLine(psInfo);
306
/* Uncompress the next line of input and return it
308
pszLine = _UncompressNextLine(psInfo);
311
/* If we just reached EOF then make sure we don't add an extra
312
* empty line at the end of the uncompressed oputput.
314
if (psInfo->bEOF && strlen(pszLine) == 0)
321
/**********************************************************************
322
* _ReadNextSourceLine()
324
* Loads the next line from the source file in psInfo.
326
* psInfo->bEOF should be checked after this call.
327
**********************************************************************/
328
static void _ReadNextSourceLine(E00ReadPtr psInfo)
332
psInfo->iInBufPtr = 0;
333
psInfo->szInBuf[0] = '\0';
335
/* Read either using fgets() or psInfo->pfnReadNextLine()
336
* depending on the way the file was opened...
338
if (psInfo->pfnReadNextLine == NULL)
340
if (VSIFGets(psInfo->szInBuf,E00_READ_BUF_SIZE,psInfo->fp) == NULL)
350
pszLine = psInfo->pfnReadNextLine(psInfo->pRefData);
353
strncpy(psInfo->szInBuf, pszLine, E00_READ_BUF_SIZE);
365
/* A new line was succesfully read. Remove trailing '\n' if any.
366
* (Note: For Unix systems, we also have to check for '\r')
369
nLen = strlen(psInfo->szInBuf);
370
while(nLen > 0 && (psInfo->szInBuf[nLen-1] == '\n' ||
371
psInfo->szInBuf[nLen-1] == '\r' ) )
374
psInfo->szInBuf[nLen] = '\0';
377
psInfo->nInputLineNo++;
383
/**********************************************************************
384
* _GetNextSourceChar()
386
* Returns the next char from the source file input buffer... and
387
* reload the input buffer when necessary... this function makes the
388
* whole input file appear as one huge null-terminated string with
389
* no line delimiters.
391
* Will return '\0' when EOF is reached.
392
**********************************************************************/
393
static char _GetNextSourceChar(E00ReadPtr psInfo)
399
if (psInfo->szInBuf[psInfo->iInBufPtr] == '\0')
401
_ReadNextSourceLine(psInfo);
402
c = _GetNextSourceChar(psInfo);
406
c = psInfo->szInBuf[psInfo->iInBufPtr++];
413
/**********************************************************************
416
* Reverse the effect of the previous call to _GetNextSourceChar() by
417
* moving the input buffer pointer back 1 character.
419
* This function can be called only once per call to _GetNextSourceChar()
420
* (i.e. you cannot unget more than one character) otherwise the pointer
421
* could move before the beginning of the input buffer.
422
**********************************************************************/
423
static void _UngetSourceChar(E00ReadPtr psInfo)
425
if (psInfo->iInBufPtr > 0)
429
/* This error can happen only if _UngetSourceChar() is called
430
* twice in a row (which should never happen!).
432
CPLError(CE_Failure, CPLE_AssertionFailed,
433
"UNEXPECTED INTERNAL ERROR: _UngetSourceChar() "
434
"failed while reading line %d.", psInfo->nInputLineNo);
438
/**********************************************************************
439
* _UncompressNextLine()
441
* Uncompress one line of input and return a reference to an internal
442
* buffer containing the uncompressed output.
443
**********************************************************************/
444
static const char *_UncompressNextLine(E00ReadPtr psInfo)
447
int bEOL = 0; /* Set to 1 when End of Line reached */
448
int iOutBufPtr = 0, i, n;
449
int iDecimalPoint, bOddNumDigits, iCurDigit;
451
int bPreviousCodeWasNumeric = 0;
453
while(!bEOL && (c=_GetNextSourceChar(psInfo)) != '\0')
457
/* Normal character... just copy it
459
psInfo->szOutBuf[iOutBufPtr++] = c;
460
bPreviousCodeWasNumeric = 0;
464
/* ========================================================
465
* Found an encoded sequence.
466
* =======================================================*/
467
c = _GetNextSourceChar(psInfo);
469
/* --------------------------------------------------------
470
* Compression level 1: only spaces, '~' and '\n' are encoded
471
* -------------------------------------------------------*/
474
/* "~ " followed by number of spaces
476
c = _GetNextSourceChar(psInfo);
479
psInfo->szOutBuf[iOutBufPtr++] = ' ';
480
bPreviousCodeWasNumeric = 0;
487
bPreviousCodeWasNumeric = 0;
489
else if (bPreviousCodeWasNumeric)
491
/* If the previous code was numeric, then the only valid code
492
* sequences are the ones above: "~ " and "~}". If we end up
493
* here, it is because the number was followed by a '~' but
494
* this '~' was not a code, it only marked the end of a
495
* number that was not followed by any space.
497
* We should simply ignore the '~' and return the character
498
* that follows it directly.
500
psInfo->szOutBuf[iOutBufPtr++] = c;
501
bPreviousCodeWasNumeric = 0;
503
else if (c == '~' || c == '-')
505
/* "~~" and "~-" are simple escape sequences for '~' and '-'
507
psInfo->szOutBuf[iOutBufPtr++] = c;
509
/* --------------------------------------------------------
510
* Compression level 2: numeric values are encoded.
512
* All codes for this level are in the form "~ c0 c1 c2 ... cn"
515
* ~ marks the beginning of a new code sequence
517
* c0 is a single character code defining the format
518
* of the number (decimal position, exponent,
519
* and even or odd number of digits)
521
* c1 c2 ... cn each of these characters represent a pair of
522
* digits of the encoded value with '!' == 00
523
* values 92..99 are encoded on 2 chars that
524
* must be added to each other
525
* (i.e. 92 == }!, 93 == }", ...)
527
* The sequence ends with a ' ' or a '~' character
528
* -------------------------------------------------------*/
529
else if (c >= '!' && c <= 'z')
531
/* The format code defines 3 characteristics of the final number:
532
* - Presence of a decimal point and its position
533
* - Presence of an exponent, and its sign
534
* - Odd or even number of digits
537
iDecimalPoint = n % 15; /* 0 = no decimal point */
538
bOddNumDigits = n / 45; /* 0 = even num.digits, 1 = odd */
542
else if (n % 3 == 2 )
547
/* Decode the c1 c2 ... cn value and apply the format.
548
* Read characters until we encounter a ' ' or a '~'
551
while((c=_GetNextSourceChar(psInfo)) != '\0' &&
552
c != ' ' && c != '~')
555
if (n == 92 && (c=_GetNextSourceChar(psInfo)) != '\0')
558
psInfo->szOutBuf[iOutBufPtr++] = '0' + n/10;
560
if (++iCurDigit == iDecimalPoint)
561
psInfo->szOutBuf[iOutBufPtr++] = '.';
563
psInfo->szOutBuf[iOutBufPtr++] = '0' + n%10;
565
if (++iCurDigit == iDecimalPoint)
566
psInfo->szOutBuf[iOutBufPtr++] = '.';
569
if (c == '~' || c == ' ')
571
bPreviousCodeWasNumeric = 1;
572
_UngetSourceChar(psInfo);
575
/* If odd number of digits, then flush the last one
580
/* Insert the exponent string before the 2 last digits
581
* (we assume the exponent string is 2 chars. long)
587
psInfo->szOutBuf[iOutBufPtr] =
588
psInfo->szOutBuf[iOutBufPtr-2];
589
psInfo->szOutBuf[iOutBufPtr-2] = pszExp[i];
596
/* Unsupported code sequence... this is a possibility
597
* given the fact that this library was written by
598
* reverse-engineering the format!
600
* Send an error to the user and abort.
602
* If this error ever happens, and you are convinced that
603
* the input file is not corrupted, then please report it to
604
* me at dmorissette@dmsolutions.ca, quoting the section of the input
605
* file that produced it, and I'll do my best to add support
606
* for this code sequence.
608
CPLError(CE_Failure, CPLE_NotSupported,
609
"Unexpected code \"~%c\" encountered in line %d.",
610
c, psInfo->nInputLineNo);
612
/* Force the program to abort by simulating a EOF
620
/* E00 lines should NEVER be longer than 80 chars. if we passed
621
* that limit, then the input file is likely corrupt.
625
CPLError(CE_Failure, CPLE_FileIO,
626
"Uncompressed line longer than 80 chars. "
627
"Input file possibly corrupt around line %d.",
628
psInfo->nInputLineNo);
629
/* Force the program to abort by simulating a EOF
637
psInfo->szOutBuf[iOutBufPtr++] = '\0';
639
return psInfo->szOutBuf;