2
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License as
6
* published by the Free Software Foundation; version 2 of the
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
19
#ifndef _HTML_ENTITIES_H_
20
#define _HTML_ENTITIES_H_
27
static HTMLEntity XMLEntities[] =
36
static HTMLEntity HTMLEntities[] =
43
{"nbsp", 160}, // no-break space = non-breaking space, U+00A0 ISOnum
44
{"iexcl", 161}, // inverted exclamation mark, U+00A1 ISOnum
45
{"cent", 162}, // cent sign, U+00A2 ISOnum
46
{"pound", 163}, // pound sign, U+00A3 ISOnum
47
{"curren", 164}, // currency sign, U+00A4 ISOnum
48
{"yen", 165}, // yen sign = yuan sign, U+00A5 ISOnum
49
{"brvbar", 166}, // broken bar = broken vertical bar, U+00A6 ISOnum
50
{"sect", 167}, // section sign, U+00A7 ISOnum
51
{"uml", 168}, // diaeresis = spacing diaeresis, U+00A8 ISOdia
52
{"copy", 169}, // copyright sign, U+00A9 ISOnum
53
{"ordf", 170}, // feminine ordinal indicator, U+00AA ISOnum
54
{"laquo", 171}, // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
55
{"not", 172}, // not sign, U+00AC ISOnum
56
{"shy", 173}, // soft hyphen = discretionary hyphen, U+00AD ISOnum
57
{"reg", 174}, // registered sign = registered trade mark sign, U+00AE ISOnum
58
{"macr", 175}, // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
59
{"deg", 176}, // degree sign, U+00B0 ISOnum
60
{"plusmn", 177}, // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
61
{"sup2", 178}, // superscript two = superscript digit two = squared, U+00B2 ISOnum
62
{"sup3", 179}, // superscript three = superscript digit three = cubed, U+00B3 ISOnum
63
{"acute", 180}, // acute accent = spacing acute, U+00B4 ISOdia
64
{"micro", 181}, // micro sign, U+00B5 ISOnum
65
{"para", 182}, // pilcrow sign = paragraph sign, U+00B6 ISOnum
66
{"middot", 183}, // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
67
{"cedil", 184}, // cedilla = spacing cedilla, U+00B8 ISOdia
68
{"sup1", 185}, // superscript one = superscript digit one, U+00B9 ISOnum
69
{"ordm", 186}, // masculine ordinal indicator, U+00BA ISOnum
70
{"raquo", 187}, // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
71
{"frac14", 188}, // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
72
{"frac12", 189}, // vulgar fraction one half = fraction one half, U+00BD ISOnum
73
{"frac34", 190}, // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
74
{"iquest", 191}, // inverted question mark = turned question mark, U+00BF ISOnum
75
{"Agrave", 192}, // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
76
{"Aacute", 193}, // latin capital letter A with acute, U+00C1 ISOlat1
77
{"Acirc", 194}, // latin capital letter A with circumflex, U+00C2 ISOlat1
78
{"Atilde", 195}, // latin capital letter A with tilde, U+00C3 ISOlat1
79
{"Auml", 196}, // latin capital letter A with diaeresis, U+00C4 ISOlat1
80
{"Aring", 197}, // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
81
{"AElig", 198}, // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
82
{"Ccedil", 199}, // latin capital letter C with cedilla, U+00C7 ISOlat1
83
{"Egrave", 200}, // latin capital letter E with grave, U+00C8 ISOlat1
84
{"Eacute", 201}, // latin capital letter E with acute, U+00C9 ISOlat1
85
{"Ecirc", 202}, // latin capital letter E with circumflex, U+00CA ISOlat1
86
{"Euml", 203}, // latin capital letter E with diaeresis, U+00CB ISOlat1
87
{"Igrave", 204}, // latin capital letter I with grave, U+00CC ISOlat1
88
{"Iacute", 205}, // latin capital letter I with acute, U+00CD ISOlat1
89
{"Icirc", 206}, // latin capital letter I with circumflex, U+00CE ISOlat1
90
{"Iuml", 207}, // latin capital letter I with diaeresis, U+00CF ISOlat1
91
{"ETH", 208}, // latin capital letter ETH, U+00D0 ISOlat1
92
{"Ntilde", 209}, // latin capital letter N with tilde, U+00D1 ISOlat1
93
{"Ograve", 210}, // latin capital letter O with grave, U+00D2 ISOlat1
94
{"Oacute", 211}, // latin capital letter O with acute, U+00D3 ISOlat1
95
{"Ocirc", 212}, // latin capital letter O with circumflex, U+00D4 ISOlat1
96
{"Otilde", 213}, // latin capital letter O with tilde, U+00D5 ISOlat1
97
{"Ouml", 214}, // latin capital letter O with diaeresis, U+00D6 ISOlat1
98
{"times", 215}, // multiplication sign, U+00D7 ISOnum
99
{"Oslash", 216}, // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
100
{"Ugrave", 217}, // latin capital letter U with grave, U+00D9 ISOlat1
101
{"Uacute", 218}, // latin capital letter U with acute, U+00DA ISOlat1
102
{"Ucirc", 219}, // latin capital letter U with circumflex, U+00DB ISOlat1
103
{"Uuml", 220}, // latin capital letter U with diaeresis, U+00DC ISOlat1
104
{"Yacute", 221}, // latin capital letter Y with acute, U+00DD ISOlat1
105
{"THORN", 222}, // latin capital letter THORN, U+00DE ISOlat1
106
{"szlig", 223}, // latin small letter sharp s = ess-zed, U+00DF ISOlat1
107
{"agrave", 224}, // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
108
{"aacute", 225}, // latin small letter a with acute, U+00E1 ISOlat1
109
{"acirc", 226}, // latin small letter a with circumflex, U+00E2 ISOlat1
110
{"atilde", 227}, // latin small letter a with tilde, U+00E3 ISOlat1
111
{"auml", 228}, // latin small letter a with diaeresis, U+00E4 ISOlat1
112
{"aring", 229}, // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
113
{"aelig", 230}, // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
114
{"ccedil", 231}, // latin small letter c with cedilla, U+00E7 ISOlat1
115
{"egrave", 232}, // latin small letter e with grave, U+00E8 ISOlat1
116
{"eacute", 233}, // latin small letter e with acute, U+00E9 ISOlat1
117
{"ecirc", 234}, // latin small letter e with circumflex, U+00EA ISOlat1
118
{"euml", 235}, // latin small letter e with diaeresis, U+00EB ISOlat1
119
{"igrave", 236}, // latin small letter i with grave, U+00EC ISOlat1
120
{"iacute", 237}, // latin small letter i with acute, U+00ED ISOlat1
121
{"icirc", 238}, // latin small letter i with circumflex, U+00EE ISOlat1
122
{"iuml", 239}, // latin small letter i with diaeresis, U+00EF ISOlat1
123
{"eth", 240}, // latin small letter eth, U+00F0 ISOlat1
124
{"ntilde", 241}, // latin small letter n with tilde, U+00F1 ISOlat1
125
{"ograve", 242}, // latin small letter o with grave, U+00F2 ISOlat1
126
{"oacute", 243}, // latin small letter o with acute, U+00F3 ISOlat1
127
{"ocirc", 244}, // latin small letter o with circumflex, U+00F4 ISOlat1
128
{"otilde", 245}, // latin small letter o with tilde, U+00F5 ISOlat1
129
{"ouml", 246}, // latin small letter o with diaeresis, U+00F6 ISOlat1
130
{"divide", 247}, // division sign, U+00F7 ISOnum
131
{"oslash", 248}, // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
132
{"ugrave", 249}, // latin small letter u with grave, U+00F9 ISOlat1
133
{"uacute", 250}, // latin small letter u with acute, U+00FA ISOlat1
134
{"ucirc", 251}, // latin small letter u with circumflex, U+00FB ISOlat1
135
{"uuml", 252}, // latin small letter u with diaeresis, U+00FC ISOlat1
136
{"yacute", 253}, // latin small letter y with acute, U+00FD ISOlat1
137
{"thorn", 254}, // latin small letter thorn, U+00FE ISOlat1
138
{"yuml", 255}, // latin small letter y with diaeresis, U+00FF ISOlat1
139
// symbols, mathematical symbols, and Greek letters
141
{"fnof", 402}, // latin small f with hook = function = florin, U+0192 ISOtech
143
{"Alpha", 913}, // greek capital letter alpha, U+0391
144
{"Beta", 914}, // greek capital letter beta, U+0392
145
{"Gamma", 915}, // greek capital letter gamma, U+0393 ISOgrk3
146
{"Delta", 916}, // greek capital letter delta, U+0394 ISOgrk3
147
{"Epsilon", 917}, // greek capital letter epsilon, U+0395
148
{"Zeta", 918}, // greek capital letter zeta, U+0396
149
{"Eta", 919}, // greek capital letter eta, U+0397
150
{"Theta", 920}, // greek capital letter theta, U+0398 ISOgrk3
151
{"Iota", 921}, // greek capital letter iota, U+0399
152
{"Kappa", 922}, // greek capital letter kappa, U+039A
153
{"Lambda", 923}, // greek capital letter lambda, U+039B ISOgrk3
154
{"Mu", 924}, // greek capital letter mu, U+039C
155
{"Nu", 925}, // greek capital letter nu, U+039D
156
{"Xi", 926}, // greek capital letter xi, U+039E ISOgrk3
157
{"Omicron", 927}, // greek capital letter omicron, U+039F
158
{"Pi", 928}, // greek capital letter pi, U+03A0 ISOgrk3
159
{"Rho", 929}, // greek capital letter rho, U+03A1
160
{"Sigma", 931}, // greek capital letter sigma, U+03A3 ISOgrk3,
161
// there is no Sigmaf, and no U+03A2 character either
162
{"Tau", 932}, // greek capital letter tau, U+03A4
163
{"Upsilon", 933}, // greek capital letter upsilon, U+03A5 ISOgrk3
164
{"Phi", 934}, // greek capital letter phi, U+03A6 ISOgrk3
165
{"Chi", 935}, // greek capital letter chi, U+03A7
166
{"Psi", 936}, // greek capital letter psi, U+03A8 ISOgrk3
167
{"Omega", 937}, // greek capital letter omega, U+03A9 ISOgrk3
168
{"alpha", 945}, // greek small letter alpha, U+03B1 ISOgrk3
169
{"beta", 946}, // greek small letter beta, U+03B2 ISOgrk3
170
{"gamma", 947}, // greek small letter gamma, U+03B3 ISOgrk3
171
{"delta", 948}, // greek small letter delta, U+03B4 ISOgrk3
172
{"epsilon", 949}, // greek small letter epsilon, U+03B5 ISOgrk3
173
{"zeta", 950}, // greek small letter zeta, U+03B6 ISOgrk3
174
{"eta", 951}, // greek small letter eta, U+03B7 ISOgrk3
175
{"theta", 952}, // greek small letter theta, U+03B8 ISOgrk3
176
{"iota", 953}, // greek small letter iota, U+03B9 ISOgrk3
177
{"kappa", 954}, // greek small letter kappa, U+03BA ISOgrk3
178
{"lambda", 955}, // greek small letter lambda, U+03BB ISOgrk3
179
{"mu", 956}, // greek small letter mu, U+03BC ISOgrk3
180
{"nu", 957}, // greek small letter nu, U+03BD ISOgrk3
181
{"xi", 958}, // greek small letter xi, U+03BE ISOgrk3
182
{"omicron", 959}, // greek small letter omicron, U+03BF NEW
183
{"pi", 960}, // greek small letter pi, U+03C0 ISOgrk3
184
{"rho", 961}, // greek small letter rho, U+03C1 ISOgrk3
185
{"sigmaf", 962}, // greek small letter final sigma, U+03C2 ISOgrk3
186
{"sigma", 963}, // greek small letter sigma, U+03C3 ISOgrk3
187
{"tau", 964}, // greek small letter tau, U+03C4 ISOgrk3
188
{"upsilon", 965}, // greek small letter upsilon, U+03C5 ISOgrk3
189
{"phi", 966}, // greek small letter phi, U+03C6 ISOgrk3
190
{"chi", 967}, // greek small letter chi, U+03C7 ISOgrk3
191
{"psi", 968}, // greek small letter psi, U+03C8 ISOgrk3
192
{"omega", 969}, // greek small letter omega, U+03C9 ISOgrk3
193
{"thetasym", 977}, // greek small letter theta symbol, U+03D1 NEW
194
{"upsih", 978}, // greek upsilon with hook symbol, U+03D2 NEW
195
{"piv", 982}, // greek pi symbol, U+03D6 ISOgrk3
196
// General Punctuation
197
{"bull", 8226}, // bullet = black small circle, U+2022 ISOpub,
198
// bullet is NOT the same as bullet operator, U+2219
199
{"hellip", 8230}, // horizontal ellipsis = three dot leader, U+2026 ISOpub
200
{"prime", 8242}, // prime = minutes = feet, U+2032 ISOtech
201
{"Prime", 8243}, // double prime = seconds = inches, U+2033 ISOtech
202
{"oline", 8254}, // overline = spacing overscore, U+203E NEW
203
{"frasl", 8260}, // fraction slash, U+2044 NEW
204
// Letterlike Symbols
205
{"weierp", 8472}, // script capital P = power set = Weierstrass p, U+2118 ISOamso
206
{"image", 8465}, // blackletter capital I = imaginary part, U+2111 ISOamso
207
{"real", 8476}, // blackletter capital R = real part symbol, U+211C ISOamso
208
{"trade", 8482}, // trade mark sign, U+2122 ISOnum
209
{"alefsym", 8501}, // alef symbol = first transfinite cardinal, U+2135 NEW
210
// alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same
211
// glyph could be used to depict both characters
213
{"larr", 8592}, // leftwards arrow, U+2190 ISOnum
214
{"uarr", 8593}, // upwards arrow, U+2191 ISOnu
215
{"rarr", 8594}, // rightwards arrow, U+2192 ISOnum
216
{"darr", 8595}, // downwards arrow, U+2193 ISOnum
217
{"harr", 8596}, // left right arrow, U+2194 ISOamsa
218
{"crarr", 8629}, // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
219
{"lArr", 8656}, // leftwards double arrow, U+21D0 ISOtech
220
// ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but
221
// also does not have any other charater for that function. So ? lArr can be used
222
// for 'is implied by' as ISOtech sugg
223
{"uArr", 8657}, // upwards double arrow, U+21D1 ISOamsa
224
{"rArr", 8658}, // rightwards double arrow, U+21D2 ISOtech
225
// ISO 10646 does not say this is the 'implies' character but does not have another
226
// character with this function so ? rArr can be used for 'implies' as ISOtech suggests
227
{"dArr", 8659}, // downwards double arrow, U+21D3 ISOamsa
228
{"hArr", 8660}, // left right double arrow, U+21D4 ISOamsa
229
// Mathematical Operators
230
{"forall", 8704}, // for all, U+2200 ISOtech
231
{"part", 8706}, // partial differential, U+2202 ISOtech
232
{"exist", 8707}, // there exists, U+2203 ISOtech
233
{"empty", 8709}, // empty set = null set = diameter, U+2205 ISOamso
234
{"nabla", 8711}, // nabla = backward difference, U+2207 ISOtech
235
{"isin", 8712}, // element of, U+2208 ISOtech
236
{"notin", 8713}, // not an element of, U+2209 ISOtech
237
{"ni", 8715}, // contains as member, U+220B ISOtech
238
{"prod", 8719}, // n-ary product = product sign, U+220F ISOamsb
239
// prod is NOT the same character as U+03A0 'greek capital letter pi' though the
240
// same glyph might be used for both
241
{"sum", 8721}, // n-ary sumation, U+2211 ISOamsb
242
// sum is NOT the same character as U+03A3 'greek capital letter sigma' though the
243
// same glyph might be used for both
244
{"minus", 8722}, // minus sign, U+2212 ISOtech
245
{"lowast", 8727}, // asterisk operator, U+2217 ISOtech
246
{"radic", 8730}, // square root = radical sign, U+221A ISOtech
247
{"prop", 8733}, // proportional to, U+221D ISOtech
248
{"infin", 8734}, // infinity, U+221E ISOtech
249
{"ang", 8736}, // angle, U+2220 ISOamso
250
{"and", 8743}, // logical and = wedge, U+2227 ISOtech
251
{"or", 8744}, // logical or = vee, U+2228 ISOtech
252
{"cap", 8745}, // intersection = cap, U+2229 ISOtech
253
{"cup", 8746}, // union = cup, U+222A ISOtech
254
{"int", 8747}, // integral, U+222B ISOtech
255
{"there4", 8756}, // therefore, U+2234 ISOtech
256
{"sim", 8764}, // tilde operator = varies with = similar to, U+223C ISOtech
257
// tilde operator is NOT the same character as the tilde, U+007E, although the same
258
// glyph might be used to represent both
259
{"cong", 8773}, // approximately equal to, U+2245 ISOtech
260
{"asymp", 8776}, // almost equal to = asymptotic to, U+2248 ISOamsr
261
{"ne", 8800}, // not equal to, U+2260 ISOtech
262
{"equiv", 8801}, // identical to, U+2261 ISOtech
263
{"le", 8804}, // less-than or equal to, U+2264 ISOtech
264
{"ge", 8805}, // greater-than or equal to, U+2265 ISOtech
265
{"sub", 8834}, // subset of, U+2282 ISOtech
266
{"sup", 8835}, // superset of, U+2283 ISOtech
267
// note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font
268
// encoding and is not included.
269
{"nsub", 8836}, // not a subset of, U+2284 ISOamsn
270
{"sube", 8838}, // subset of or equal to, U+2286 ISOtech
271
{"supe", 8839}, // superset of or equal to, U+2287 ISOtech
272
{"oplus", 8853}, // circled plus = direct sum, U+2295 ISOamsb
273
{"otimes", 8855}, // circled times = vector product, U+2297 ISOamsb
274
{"perp", 8869}, // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
275
{"sdot", 8901}, // dot operator, U+22C5 ISOamsb
276
// dot operator is NOT the same character as U+00B7 middle dot
277
// Miscellaneous Technical
278
{"lceil", 8968}, // left ceiling = apl upstile, U+2308 ISOamsc
279
{"rceil", 8969}, // right ceiling, U+2309 ISOamsc
280
{"lfloor", 8970}, // left floor = apl downstile, U+230A ISOamsc
281
{"rfloor", 8971}, // right floor, U+230B ISOamsc
282
{"lang", 9001}, // left-pointing angle bracket = bra, U+2329 ISOtech
283
// lang is NOT the same character as U+003C 'less than' or U+2039 'single
284
// left-pointing angle quotation mark'
285
{"rang", 9002}, // right-pointing angle bracket = ket, U+232A ISOtech
286
// rang is NOT the same character as U+003E 'greater than' or U+203A 'single
287
// right-pointing angle quotation mark'
289
{"loz", 9674}, // lozenge, U+25CA ISOpub
290
// Miscellaneous Symbols
291
{"spades", 9824}, // black spade suit, U+2660 ISOpub
292
// black here seems to mean filled as opposed to hollow
293
{"clubs", 9827}, // black club suit = shamrock, U+2663 ISOpub
294
{"hearts", 9829}, // black heart suit = valentine, U+2665 ISOpub
295
{"diams", 9830}, // black diamond suit, U+2666 ISOpub
297
// markup-significant and internationalization characters
298
// C0 Controls and Basic Latin
299
{"quot", 34}, // quotation mark = APL quote, U+0022 ISOnum
300
{"amp", 38}, // ampersand, U+0026 ISOnum
301
{"lt", 60}, // less-than sign, U+003C ISOnum
302
{"gt", 62}, // greater-than sign, U+003E ISOnum
304
{"OElig", 338}, // latin capital ligature OE, U+0152 ISOlat2
305
{"oelig", 339}, // latin small ligature oe, U+0153 ISOlat2
306
// ligature is a misnomer, this is a separate character in some languages
307
{"Scaron", 352}, // latin capital letter S with caron, U+0160 ISOlat2
308
{"scaron", 353}, // latin small letter s with caron, U+0161 ISOlat2
309
{"Yuml", 376}, // latin capital letter Y with diaeresis, U+0178 ISOlat2
310
// Spacing Modifier Letters
311
{"circ", 710}, // modifier letter circumflex accent, U+02C6 ISOpub
312
{"tilde", 732}, // small tilde, U+02DC ISOdia
313
// General Punctuation
314
{"ensp", 8194}, // en space, U+2002 ISOpub
315
{"emsp", 8195}, // em space, U+2003 ISOpub
316
{"thinsp", 8201}, // thin space, U+2009 ISOpub
317
{"zwnj", 8204}, // zero width non-joiner, U+200C NEW RFC 2070
318
{"zwj", 8205}, // zero width joiner, U+200D NEW RFC 2070
319
{"lrm", 8206}, // left-to-right mark, U+200E NEW RFC 2070
320
{"rlm", 8207}, // right-to-left mark, U+200F NEW RFC 2070
321
{"ndash", 8211}, // en dash, U+2013 ISOpub
322
{"mdash", 8212}, // em dash, U+2014 ISOpub
323
{"lsquo", 8216}, // left single quotation mark, U+2018 ISOnum
324
{"rsquo", 8217}, // right single quotation mark, U+2019 ISOnum
325
{"sbquo", 8218}, // single low-9 quotation mark, U+201A NEW
326
{"ldquo", 8220}, // left double quotation mark, U+201C ISOnum
327
{"rdquo", 8221}, // right double quotation mark, U+201D ISOnum
328
{"bdquo", 8222}, // double low-9 quotation mark, U+201E NEW
329
{"dagger", 8224}, // dagger, U+2020 ISOpub
330
{"Dagger", 8225}, // double dagger, U+2021 ISOpub
331
{"permil", 8240}, // per mille sign, U+2030 ISOtech
332
{"lsaquo", 8249}, // single left-pointing angle quotation mark, U+2039 ISO proposed
333
// lsaquo is proposed but not yet ISO standardized
334
{"rsaquo", 8250}, // single right-pointing angle quotation mark, U+203A ISO proposed
335
// rsaquo is proposed but not yet ISO standardized
336
{"euro", 8364}, // euro sign, U+20AC NEW
340
#endif /* _HTML_ENTITIES_H_ */