2
Do not include this file in your project. The fparser.cc file #includes
3
this file internally and thus you don't need to do anything (other than keep
4
this file in the same directory as fparser.cc).
6
Part of this file is generated code (by using the make_function_name_parser
7
utility, found in the development version of this library). It's not intended
8
to be modified by hand.
11
unsigned nameLength = 0;
12
const unsigned maximumNameLength = 0x80000000U-8;
14
Due to the manner the identifier lengths are returned from
15
the readOpcode() function, the maximum supported length for
16
identifiers is 0x7FFFFFFF bytes. We minus 8 here to add some
17
buffer, because of the multibyteness of UTF-8.
18
Function names are limited to 0xFFFF bytes instead, but because
19
function names that long just are not defined, the point is moot.
21
const unsigned char* const uptr = (const unsigned char*) input;
22
typedef signed char schar;
23
while(likely(nameLength < maximumNameLength))
25
unsigned char byte = uptr[nameLength+0];
26
/* Handle the common case of A-Za-z first */
29
if(byte < 0x80) // 0x40..0x7F - most common case
31
// Valid characters in 40..7F: A-Za-z_
32
// Valid bitmask for 40..5F: 01111111111111111111111111100001
33
// Valid bitmask for 60..7F: 01111111111111111111111111100000
34
if(sizeof(unsigned long) == 8)
36
const unsigned n = sizeof(unsigned long)*8-32;
37
// ^ avoids compiler warning when not 64-bit
38
unsigned long masklow6bits = 1UL << (byte & 0x3F);
39
if(masklow6bits & ~((1UL << 0) | (0x0FUL << (0x1B ))
40
| (1UL << n) | (0x1FUL << (0x1B+n))))
41
{ ++nameLength; continue; }
45
unsigned masklow5bits = 1 << (byte & 0x1F);
46
if((masklow5bits & ~(1 | (0x1F << 0x1B))) || byte == '_')
47
{ ++nameLength; continue; }
55
if(byte < 0xC2) break; // 0x80..0xC1
56
if(byte == 0xC2 && uptr[nameLength+1]==0xA0) break; // skip nbsp
57
// C2-DF - next common case when >= 0x40
58
// Valid sequence: C2-DF 80-BF
59
if(schar(uptr[nameLength+1]) > schar(0xBF)) break;
63
if(byte == 0xE0) // E0
65
// Valid sequence: E0 A0-BF 80-BF
66
if((unsigned char)(uptr[nameLength+1] - 0xA0) > (0xBF-0xA0)) break;
70
if(byte == 0xED) break; // ED is invalid
71
// Valid sequence: E1-EC 80-BF 80-BF
72
// And: EE-EF 80-BF 80-BF
75
// break on various space characters
76
if(uptr[nameLength+1] == 0x80
77
&& (schar(uptr[nameLength+2]) <= schar(0x8B)
78
|| (uptr[nameLength+2] == 0xAF))) break;
79
if(uptr[nameLength+1] == 0x81
80
&& uptr[nameLength+2] == 0x9F) break;
82
if(byte == 0xE3 && uptr[nameLength+1] == 0x80
83
&& uptr[nameLength+2] == 0x80) break; // this too
85
if(schar(uptr[nameLength+1]) > schar(0xBF)) break;
87
if(schar(uptr[nameLength+2]) > schar(0xBF)) break;
91
if(byte == 0xF0) // F0
93
// Valid sequence: F0 90-BF 80-BF 80-BF
94
if((unsigned char)(uptr[nameLength+1] - 0x90) > (0xBF-0x90)) break;
98
if(byte > 0xF4) break; // F5-FF are invalid
99
if(byte == 0xF4) // F4
101
// Valid sequence: F4 80-8F
102
if(schar(uptr[nameLength+1]) > schar(0x8F)) break;
107
// Valid sequence: F1-F3 80-BF 80-BF 80-BF
108
if(schar(uptr[nameLength+1]) > schar(0xBF)) break;
111
if(schar(uptr[nameLength+2]) > schar(0xBF)) break;
112
if(schar(uptr[nameLength+3]) > schar(0xBF)) break;
118
if(sizeof(unsigned long) == 8)
120
// Valid bitmask for 00..1F: 00000000000000000000000000000000
121
// Valid bitmask for 20..3F: 00000000000000001111111111000000
122
const unsigned n = sizeof(unsigned long)*8-32;
123
// ^ avoids compiler warning when not 64-bit
124
unsigned long masklow6bits = 1UL << byte;
125
if(masklow6bits & (((1UL << 10)-1UL) << (16+n)))
126
{ ++nameLength; continue; }
130
if(byte >= '0' && byte <= '9')
131
{ ++nameLength; continue; }
137
/* This function generated with make_function_name_parser.cc */
147
#define lE 0x80000003U:3;
148
#define lD 0x80000005U:5;
149
#define lC std::memcmp(lJ+
155
#define l6 default:lF
156
#define l5 static const char tmp[
159
#define l2 0x80000004U:4;lF 4;
160
#define l1 .enabled()?(
161
#define l0 lF Functions[
173
lJ[0]){lB'a':if('b'l4
216
lJ[1]){lB'a':if('x'l4
234
lJ[1]){lB'e':if('c'l4
255
lJ[1]){lB'c':if('o'l4
277
lJ[1]){lB'b':if('r'l4
299
lJ[1]){lB'v':if('a'l4
314
lB'l':{lI'o','g','2'}
337
lB't':{lI'a','n','h'}
349
lJ[1]){lB'c':{lI'o','s','h'}
355
lN's':{lI'i','n','h'}
b'\\ No newline at end of file'