2
PROJECT: Medline database.
4
FILES: cspatch.c cstopatch.c, patchtoiso.c (this one),, cspatch.h
6
This file contains support procedures for translating ASCII strings
7
with their associated ISO patch strings into the richer ISO-8859
8
character set. The ISO-8859 character set is a superset of the
9
ASCII character which includes many accented international characters.
10
The ISO patch instructions are generated at the time the text is
11
converted from the Medline EBCDIC character set into ASCII before
12
loading the text into the database. In addition to describing how
13
to patch the ASCII text to produce the ISO character set, it also
14
contains data describing EBCDIC characters which could not be
15
represented in the ISO-8859 character set. These can be used in
16
the future if character sets become available which can be used
17
to support these untranslated characters.
19
Work started: 29 June 1991, Rand Huntzinger
20
Original version completed: 24 July 1991, Rand S. Huntzinger
23
* RCS Modification History:
24
* $Log: patchtoiso.c,v $
25
* Revision 6.0 1997/08/25 18:36:53 madden
26
* Revision changed to 6.0
28
* Revision 1.2 1995/05/17 17:55:49 epstein
29
* add RCS log revision history
35
#include "patchtoiso.h"
39
ApplyIsoPatches Translate and ASCII string into ISO-8859
41
This procedure takes an ASCII input string and the ASCII->ISO-8859
42
patch string associated with that ASCII input string and converts
43
the text to the ISO character set. It ignores the ISO patch codes
44
which indicate untranslated EBCDIC characters during the EBCDIC
49
in A pointer to the ASCII input string.
51
in_size The number of characters in the ASCII
52
input string. [We don't assume NULL
55
out A pointer to the buffer to receive
56
the ISO-8859 representation of the
57
text created by this procedure.
59
out_size The size of the output buffer.
61
patches A pointer to the ASCII->ISO patch
62
string which contains the conversion
67
The number of errors noted in the conversion. Errors include
68
buffer overflows and invalid patch instructions.
73
* (charin,intin_size,charout,intout_size,unsignedcharpatches)
75
ApplyIsoPatches (char *in, int in_size, char *out, int out_size, unsigned char *patches)
77
int ii, oi, pi; /* in, out and patch indices */
79
static unsigned char abort[] = { POP_STOP_CODE };
82
for(ii = oi = pi = 0; running; pi++) {
83
register int code = (int) patches[pi];
84
register int base_code;
85
register int advance = 0; /* Set the character pointer */
86
register int skip = 0; /* # input chars to skip */
87
register int install = 0; /* # patch chars to copy in */
89
/* Decode the basic opcode */
91
if( (base_code = code & POP_BASIC_MASK) == POP_SPECIAL_GRP )
92
if( (base_code = code & POP_SPEC_MASK) == POP_MISC_GRP )
93
if( (base_code = code & POP_MISC_MASK) == POP_SIMPLE_GRP )
96
/* Process the opcode */
100
skip = POP_PATCH_DELETE( code );
101
install = POP_PATCH_INSERT( code );
103
case POP_OFFSET1_CODE:
104
advance = code & POP_BASIC_ARGMASK;
106
case POP_OFFSET2_CODE:
107
advance = code & (POP_BASIC_ARGMASK << 8) | patches[++pi];
110
break; /* Not implemented */
112
case POP_INVALID_CODE:
114
/* These are various types on uninterpretable codes. They
115
are recorded for the future, but are ignored on output. */
116
pi += code & POP_MISC_ARGMASK;
119
advance = in_size - ii; /* Copy over rest of string */
122
default: /* Anything else is an error! */
123
LogMessage( CHARSET_ERROR, "patchtoiso",
124
"Invalid ASCII->ISO patch instruction [%02x]", code );
125
advance = in_size - ii; /* Copy over rest of string */
126
patches = abort; /* Terminate patching */
127
errors++; /* Mark an error */
130
/* Check for buffer overflows */
132
if( (ii + advance + skip) > in_size ) {
133
LogMessage( CHARSET_ERROR, "patchtoiso",
134
"Patch table input offset (%d) outside input string [%-.16s%s]",
135
ii + advance + skip, in, (in_size < 16) ? "" : "..." );
137
/* Adjust pointers to deal with overflow */
139
if( (ii + advance) > in_size ) {
140
advance = in_size - ii;
143
if( (ii + advance + skip) > in_size )
144
skip = in_size - ii - advance;
145
patches = abort; /* No more patches */
146
errors++; /* Mark an error */
148
if( (oi + advance + install) > out_size ) {
149
LogMessage( CHARSET_ERROR, "patchtoiso",
150
"ISO translation of \"%-.16s%s\" truncated to %d characters",
151
in, (in_size < 16) ? "" : "..." );
153
/* Adjust pointers to deal with overflow */
155
if( (oi + advance) > out_size ) {
156
advance = out_size - oi;
159
if( (oi + advance + install) > out_size )
160
install = out_size - oi - advance;
161
patches = abort; /* No more patches */
162
errors++; /* Mark an error */
165
/* Advance to the proper position */
167
while( advance-- > 0 )
168
out[oi++] = in[ii++];
171
/* Install patch characters */
173
while( install-- > 0 )
174
out[oi++] = patches[++pi];
177
/* Terminate the output string */
179
out[oi++] = (char) 0;