790
// pass 2: try to fill in the missing chars, looking for names of
791
// the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B'
792
// are any letters, 'xx' is two hex digits, and 'nn' is 2-4
794
if (missing && globalParams->getMapNumericCharNames()) {
795
for (code = 0; code < 256; ++code) {
796
if ((charName = enc[code]) && !toUnicode[code] &&
797
strcmp(charName, ".notdef")) {
798
n = strlen(charName);
800
if (hex && n == 3 && isalpha(charName[0]) &&
801
isxdigit(charName[1]) && isxdigit(charName[2])) {
802
sscanf(charName+1, "%x", &code2);
803
} else if (hex && n == 2 &&
804
isxdigit(charName[0]) && isxdigit(charName[1])) {
805
sscanf(charName, "%x", &code2);
806
} else if (!hex && n >= 2 && n <= 4 &&
807
isdigit(charName[0]) && isdigit(charName[1])) {
808
code2 = atoi(charName);
809
} else if (n >= 3 && n <= 5 &&
810
isdigit(charName[1]) && isdigit(charName[2])) {
811
code2 = atoi(charName+1);
812
} else if (n >= 4 && n <= 6 &&
813
isdigit(charName[2]) && isdigit(charName[3])) {
814
code2 = atoi(charName+2);
816
if (code2 >= 0 && code2 <= 0xff) {
817
toUnicode[code] = (Unicode)code2;
822
// if the 'mapUnknownCharNames' flag is set, do a simple pass-through
823
// mapping for unknown character names
824
} else if (missing && globalParams->getMapUnknownCharNames()) {
825
for (code = 0; code < 256; ++code) {
826
if (!toUnicode[code]) {
827
toUnicode[code] = code;
832
802
// construct the char code -> Unicode mapping object
833
803
ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
805
// pass 2: try to fill in the missing chars, looking for ligatures, numeric
806
// references and variants
808
for (code = 0; code < 256; ++code) {
809
if (!toUnicode[code]) {
810
if ((charName = enc[code]) && strcmp(charName, ".notdef")
811
&& (n = parseCharName(charName, uBuf, sizeof(uBuf)/sizeof(*uBuf),
812
gFalse, // don't check simple names (pass 1)
813
gTrue, // do check ligatures
814
globalParams->getMapNumericCharNames(),
816
gTrue))) { // do check variants
817
ctu->setMapping((CharCode)code, uBuf, n);
818
} else if (globalParams->getMapUnknownCharNames()) {
819
// if the 'mapUnknownCharNames' flag is set, do a simple pass-through
820
// mapping for unknown character names
821
if (charName && charName[0]) {
822
for (n = 0; n < sizeof(uBuf)/sizeof(*uBuf); ++n)
823
if (!(uBuf[n] = charName[n]))
825
ctu->setMapping((CharCode)code, uBuf, n);
828
ctu->setMapping((CharCode)code, uBuf, 1);
835
835
// merge in a ToUnicode CMap, if there is one -- this overwrites
836
836
// existing entries in ctu, i.e., the ToUnicode CMap takes
837
837
// precedence, but the other encoding info is allowed to fill in any
964
// This function is in part a derived work of the Adobe Glyph Mapping
965
// Convention: http://www.adobe.com/devnet/opentype/archives/glyph.html
966
// Algorithmic comments are excerpted from that document to aid
968
static int parseCharName(char *charName, Unicode *uBuf, int uLen,
969
GBool names, GBool ligatures,
970
GBool numeric, GBool hex, GBool variants)
973
error(-1, "Zero-length output buffer (recursion overflow?) in "
974
"parseCharName, component \"%s\"", charName);
977
// Step 1: drop all the characters from the glyph name starting with the
978
// first occurrence of a period (U+002E FULL STOP), if any.
980
char *var_part = strchr(charName, '.');
981
if (var_part == charName) {
982
return 0; // .notdef or similar
983
} else if (var_part != NULL) {
984
// parse names of the form 7.oldstyle, P.swash, s.sc, etc.
985
char *main_part = gstrndup(charName, var_part - charName);
986
GBool namesRecurse = gTrue, variantsRecurse = gFalse;
987
int n = parseCharName(main_part, uBuf, uLen, namesRecurse, ligatures,
988
numeric, hex, variantsRecurse);
993
// Step 2: split the remaining string into a sequence of components, using
994
// underscore (U+005F LOW LINE) as the delimiter.
995
if (ligatures && strchr(charName, '_')) {
996
// parse names of the form A_a (e.g. f_i, T_h, l_quotesingle)
997
char *lig_part, *lig_end, *lig_copy;
999
lig_part = lig_copy = copyString(charName);
1001
if ((lig_end = strchr(lig_part, '_')))
1003
if (lig_part[0] != '\0') {
1004
GBool namesRecurse = gTrue, ligaturesRecurse = gFalse;
1005
if ((m = parseCharName(lig_part, uBuf + n, uLen - n, namesRecurse,
1006
ligaturesRecurse, numeric, hex, variants)))
1009
error(-1, "Could not parse ligature component \"%s\" of \"%s\" in "
1010
"parseCharName", lig_part, charName);
1012
lig_part = lig_end + 1;
1013
} while (lig_end && n < uLen);
1017
// Step 3: map each component to a character string according to the
1018
// procedure below, and concatenate those strings; the result is the
1019
// character string to which the glyph name is mapped.
1020
// 3.1. if the font is Zapf Dingbats (PostScript FontName ZapfDingbats), and
1021
// the component is in the ZapfDingbats list, then map it to the
1022
// corresponding character in that list.
1023
// 3.2. otherwise, if the component is in the Adobe Glyph List, then map it
1024
// to the corresponding character in that list.
1025
if (names && (uBuf[0] = globalParams->mapNameToUnicode(charName))) {
1029
unsigned int n = strlen(charName);
1030
// 3.3. otherwise, if the component is of the form "uni" (U+0075 U+006E
1031
// U+0069) followed by a sequence of uppercase hexadecimal digits (0 .. 9,
1032
// A .. F, i.e. U+0030 .. U+0039, U+0041 .. U+0046), the length of that
1033
// sequence is a multiple of four, and each group of four digits represents
1034
// a number in the set {0x0000 .. 0xD7FF, 0xE000 .. 0xFFFF}, then interpret
1035
// each such number as a Unicode scalar value and map the component to the
1036
// string made of those scalar values. Note that the range and digit length
1037
// restrictions mean that the "uni" prefix can be used only with Unicode
1038
// values from the Basic Multilingual Plane (BMP).
1039
if (n >= 7 && (n % 4) == 3 && !strncmp(charName, "uni", 3)) {
1041
for (i = 0, m = 3; i < uLen && m < n; m += 4) {
1042
if (isxdigit(charName[m]) && isxdigit(charName[m + 1]) &&
1043
isxdigit(charName[m + 2]) && isxdigit(charName[m + 3])) {
1045
sscanf(charName + m, "%4x", &u);
1046
if (u <= 0xD7FF || (0xE000 <= u && u <= 0xFFFF)) {
1053
// 3.4. otherwise, if the component is of the form "u" (U+0075) followed by
1054
// a sequence of four to six uppercase hexadecimal digits {0 .. 9, A .. F}
1055
// (U+0030 .. U+0039, U+0041 .. U+0046), and those digits represent a
1056
// number in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then interpret this
1057
// number as a Unicode scalar value and map the component to the string
1058
// made of this scalar value.
1059
if (n >= 5 && n <= 7 && charName[0] == 'u' && isxdigit(charName[1]) &&
1060
isxdigit(charName[2]) && isxdigit(charName[3]) && isxdigit(charName[4])
1061
&& (n <= 5 || isxdigit(charName[5]))
1062
&& (n <= 6 || isxdigit(charName[6]))) {
1064
sscanf(charName + 1, "%x", &u);
1065
if (u <= 0xD7FF || (0xE000 <= u && u <= 0x10FFFF)) {
1070
// Not in Adobe Glyph Mapping convention: look for names of the form 'Axx',
1071
// 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' are any letters, 'xx' is
1072
// two hex digits, and 'nn' is 2-4 decimal digits
1073
if (hex && n == 3 && isalpha(charName[0]) &&
1074
isxdigit(charName[1]) && isxdigit(charName[2])) {
1075
sscanf(charName+1, "%x", (unsigned int *)uBuf);
1077
} else if (hex && n == 2 &&
1078
isxdigit(charName[0]) && isxdigit(charName[1])) {
1079
sscanf(charName, "%x", (unsigned int *)uBuf);
1081
} else if (!hex && n >= 2 && n <= 4 &&
1082
isdigit(charName[0]) && isdigit(charName[1])) {
1083
uBuf[0] = (Unicode)atoi(charName);
1085
} else if (n >= 3 && n <= 5 &&
1086
isdigit(charName[1]) && isdigit(charName[2])) {
1087
uBuf[0] = (Unicode)atoi(charName+1);
1089
} else if (n >= 4 && n <= 6 &&
1090
isdigit(charName[2]) && isdigit(charName[3])) {
1091
uBuf[0] = (Unicode)atoi(charName+2);
1095
// 3.5. otherwise, map the component to the empty string
964
1099
int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
965
1100
Unicode *u, int uSize, int *uLen,
966
1101
double *dx, double *dy, double *ox, double *oy) {
1752
if (vumap != 0) unicode = vumap[code];
1754
gid = mapCodeToGID(ff,cmap,unicode,gTrue);
1755
if (gid == 0 && humap != 0) {
1756
if (humap != 0) unicode = humap[code];
1757
if (unicode != 0) gid = mapCodeToGID(ff,cmap,unicode,gTrue);
1761
if (humap != 0) unicode = humap[code];
1762
if (unicode != 0) gid = mapCodeToGID(ff,cmap,unicode,wmode);
1925
for (i = 0;i < N_UCS_CANDIDATES
1926
&& gid == 0 && (unicode = humap[code*N_UCS_CANDIDATES+i]) != 0;i++) {
1927
gid = mapCodeToGID(ff,cmap,unicode,gFalse);
1930
if (gid == 0 && vumap != 0) {
1931
unicode = vumap[code];
1933
gid = mapCodeToGID(ff,cmap,unicode,gTrue);
1934
if (gid == 0 && tumap != 0) {
1935
if ((unicode = tumap[code]) != 0) {
1936
gid = mapCodeToGID(ff,cmap,unicode,gTrue);
1941
if (gid == 0 && tumap != 0) {
1942
if ((unicode = tumap[code]) != 0) {
1943
gid = mapCodeToGID(ff,cmap,unicode,gFalse);
1764
1946
if (gid == 0) {
1765
1947
/* special handling space characters */