2
/******************************************************************************
3
* MODULE : encoding.cpp
4
* DESCRIPTION: font encodings
5
* COPYRIGHT : (C) 1999 Joris van der Hoeven
6
*******************************************************************************
7
* This software falls under the GNU general public license and comes WITHOUT
8
* ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
9
* If you don't have this file, write to the Free Software Foundation, Inc.,
10
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
11
*------------------------------------------------------------------------------
12
* Encodings are abstract objects that give a semantic meaning to strings,
13
* that is, how to see the string as a sequence of tokens with a well
14
* defined semantics (where a token may require several chars in the string).
15
* This is done by providing methods to convert between the encoding
16
* and a standard 'universal' encoding, in which strings are built
17
* up from tokens, represented by strings between angular brackets < and >.
18
* Of course, these tokens are assumed to have some standard well defined
19
* semantic meaning. For instance, <a> is the token a, <alpha> the token
20
* \alpha in TeX, <'a> an accented a, and <leq> the less-or-equal token.
21
* The methods provided by an encoding are as follows:
22
* valid: checks whether a string is valid in the encoding.
23
* encode: convert from the universal encoding to the actual one.
24
* decode: convert from the actual encoding to the universal one
25
* token_forward: move one token forward at a given cursor position.
26
* token_backward: move one token backward at a given cursor position.
27
* The methods token_forward resp. token_backward returns a flag
28
* which is true if an invalid token in the encoding is encountered
29
* The routine 'token_forward' is used by default in order to
30
* check the validity of the entire string in the encoding.
31
* An empty string is returned in the case of invalid conversions.
32
******************************************************************************/
34
#include "encoding.hpp"
35
#include "hashset.hpp"
37
RESOURCE_CODE(encoding);
40
encoding_rep::valid (string s) {
43
if (token_forward (s, i)) return false;
47
/******************************************************************************
48
* The universal encoding
49
******************************************************************************/
51
struct univ_encoding_rep: encoding_rep {
53
string encode (string);
54
string decode (string);
55
bool token_forward (string s, int& pos);
56
bool token_backward (string s, int& pos);
59
univ_encoding_rep::univ_encoding_rep (): encoding_rep ("universal") {}
62
univ_encoding_rep::encode (string s) {
63
if (!valid (s)) return string("");
68
univ_encoding_rep::decode (string s) {
69
if (!valid (s)) return string("");
74
univ_encoding_rep::token_forward (string s, int& pos) {
75
if (pos==N(s)) return false;
76
if (s[pos]!='<') return true;
77
if ((pos<=N(s)-3) && ((s[pos+1]=='<')||(s[pos+1]=='>')) && (s[pos+2]=='>')) {
83
if (pos==N(s)) return true;
84
if (s[pos]=='<') return true;
85
} while (s[pos]!='>');
91
univ_encoding_rep::token_backward (string s, int& pos) {
92
if (pos==0) return false;
94
if (s[pos]!='>') return true;
95
if ((pos>=2) && ((s[pos-1]=='<')||(s[pos-1]=='>')) && (s[pos-2]=='<')) {
100
if (pos==0) return true;
102
if (s[pos]=='>') return true;
103
} while (s[pos]!='<');
107
/******************************************************************************
109
******************************************************************************/
111
struct sub_encoding_rep: encoding_rep {
113
hashset<string> valid_tokens;
115
sub_encoding_rep (string name, encoding& enc2, hashset<string>& S);
116
string encode (string);
117
string decode (string);
118
bool token_forward (string s, int& pos);
119
bool token_backward (string s, int& pos);
122
sub_encoding_rep::sub_encoding_rep (string name, encoding& enc2,
124
encoding_rep (name), enc (enc2), valid_tokens (S) {}
127
sub_encoding_rep::encode (string s) {
128
string r=enc->encode (s);
129
if (!valid (r)) return string("");
134
sub_encoding_rep::decode (string s) {
135
if (!valid (s)) return string("");
136
return enc->decode (s);
140
sub_encoding_rep::token_forward (string s, int& pos) {
142
if (enc->token_forward (s, pos)) return true;
143
return !valid_tokens->contains (s (start, pos));
147
sub_encoding_rep::token_backward (string s, int& pos) {
149
if (enc->token_backward (s, pos)) return true;
150
return !valid_tokens->contains (s (pos, end));
153
/******************************************************************************
155
******************************************************************************/
157
struct ascii_encoding_rep: encoding_rep {
158
bool (*in_range) (char);
159
ascii_encoding_rep (string name, bool (*in_range) (char));
160
string encode (string);
161
string decode (string);
162
bool token_forward (string s, int& pos);
163
bool token_backward (string s, int& pos);
166
ascii_encoding_rep::ascii_encoding_rep (string name, bool (*in_range2) (char)):
167
encoding_rep (name), in_range (in_range2) {}
170
ascii_encoding_rep::encode (string s) {
171
if ((N(s)%3)!=0) return string ("");
174
for (i=0; i<N(s); i+=3) {
176
(!in_range (s[3*i+1])) ||
177
(s[3*i+2]!='>')) return string ("");
184
ascii_encoding_rep::decode (string s) {
187
for (i=0; i<N(s); i++) {
188
if (!in_range (s[i])) return string ("");
197
ascii_encoding_rep::token_forward (string s, int& pos) {
198
if (pos==N(s)) return false;
199
return !in_range (s[pos++]);
203
ascii_encoding_rep::token_backward (string s, int& pos) {
204
if (pos==0) return false;
205
return !in_range (s[--pos]);
208
/******************************************************************************
210
******************************************************************************/
212
struct join_encoding_rep: encoding_rep {
214
join_encoding_rep (string name, encoding enc1, encoding enc2);
215
string encode (string);
216
string decode (string);
217
bool token_forward (string s, int& pos);
218
bool token_backward (string s, int& pos);
221
join_encoding_rep::join_encoding_rep (string name,
222
encoding enc1b, encoding enc2b):
223
encoding_rep (name), enc1 (enc1b), enc2 (enc2b) {}
226
join_encoding_rep::encode (string s) {
231
if (universal_enc->token_forward (s, pos)) return string ("");
232
string ss = s (start, pos);
233
string ss1= enc1->encode (ss);
234
if (N(ss1)!=0) r << ss1;
236
string ss2= enc2->encode (ss);
237
if (N(ss1)!=0) r << ss2;
238
else return string ("");
245
join_encoding_rep::decode (string s) {
250
if (enc1->token_forward (s, pos)) {
252
if (enc2->token_forward (s, pos)) return string ("");
254
string ss = s (start, pos);
255
r << enc2->decode (ss);
259
string ss = s (start, pos);
260
r << enc1->decode (ss);
267
join_encoding_rep::token_forward (string s, int& pos) {
269
if (enc1->token_forward (s, pos)) {
271
return enc2->token_forward (s, pos);
277
join_encoding_rep::token_backward (string s, int& pos) {
279
if (enc1->token_backward (s, pos)) {
281
return enc2->token_backward (s, pos);
287
join (encoding enc1, encoding enc2) {
288
string name= enc1->res_name * "|" * enc2->res_name;
289
return make (encoding, name, new join_encoding_rep (name, enc1, enc2));
292
/******************************************************************************
293
* some important standard encodings
294
******************************************************************************/
296
static bool char_is_numeric (char c) {
297
return (c=='.') || ((c>='0') && (c<='9')) || (c==','); }
298
static bool char_is_capital (char c) {
299
return ((c>='A') && (c<='Z')); }
300
static bool char_is_alpha (char c) {
301
return ((c>='a') && (c<='z')) || ((c>='A') && (c<='Z')); }
302
static bool char_is_alpha_num (char c) {
304
(c=='-') || ((c>='0') && (c<='9')) || (c=='.') || (c==',') ||
305
((c>='a') && (c<='z')) || ((c>='A') && (c<='Z')); }
306
static bool always_true (char c) { (void) c;
308
static bool char_not_less_gtr (char c) {
309
return (c!='<') & (c!='>'); }
311
static string ALW ("always");
312
static string AAL ("almost_always");
313
encoding universal_enc= new univ_encoding_rep ();
314
encoding always_enc= new ascii_encoding_rep (ALW, always_true);
315
encoding almost_always_enc= new ascii_encoding_rep (AAL, char_not_less_gtr);
316
encoding math_enc= join (universal_enc, almost_always_enc);
320
return make (encoding, "num",
321
new ascii_encoding_rep ("num", char_is_numeric));
326
return make (encoding, "capital",
327
new ascii_encoding_rep ("capital", char_is_capital));
332
return make (encoding, "alpha",
333
new ascii_encoding_rep ("alpha", char_is_alpha));
338
return make (encoding, "alpha",
339
new ascii_encoding_rep ("alphanum", char_is_alpha_num));