1
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3
* ***** BEGIN LICENSE BLOCK *****
4
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
* The contents of this file are subject to the Mozilla Public License Version
7
* 1.1 (the "License"); you may not use this file except in compliance with
8
* the License. You may obtain a copy of the License at
9
* http://www.mozilla.org/MPL/
11
* Software distributed under the License is distributed on an "AS IS" basis,
12
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
* for the specific language governing rights and limitations under the
16
* The Original Code is Mozilla Communicator client code, released
19
* The Initial Developer of the Original Code is
20
* Netscape Communications Corporation.
21
* Portions created by the Initial Developer are Copyright (C) 1998
22
* the Initial Developer. All Rights Reserved.
26
* Alternatively, the contents of this file may be used under the terms of
27
* either of the GNU General Public License Version 2 or later (the "GPL"),
28
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29
* in which case the provisions of the GPL or the LGPL are applicable instead
30
* of those above. If you wish to allow use of your version of this file only
31
* under the terms of either the GPL or the LGPL, and not to allow others to
32
* use your version of this file under the terms of the MPL, indicate your
33
* decision by deleting the provisions above and replace them with the notice
34
* and other provisions required by the GPL or the LGPL. If you do not delete
35
* the provisions above, a recipient may use your version of this file under
36
* the terms of any one of the MPL, the GPL or the LGPL.
38
* ***** END LICENSE BLOCK ***** */
43
* JS string type implementation.
45
* A JS string is a counted array of unicode characters. To support handoff
46
* of API client memory, the chars are allocated separately from the length,
47
* necessitating a pointer after the count, to form a separately allocated
48
* string descriptor. String descriptors are GC'ed, while their chars are
49
* allocated from the malloc heap.
51
* When a string is treated as an object (by following it with . or []), the
52
* runtime wraps it with a JSObject whose valueOf method returns the unwrapped
63
* The original GC-thing "string" type, a flat character string owned by its
64
* GC-thing descriptor. The chars member points to a vector having byte size
65
* (length + 1) * sizeof(jschar), terminated at index length by a zero jschar.
66
* The terminator is purely a backstop, in case the chars pointer flows out to
67
* native code that requires \u0000 termination.
69
* NB: Always use the JSSTRING_LENGTH and JSSTRING_CHARS accessor macros,
70
* unless you guard str->member uses with !JSSTRING_IS_DEPENDENT(str).
78
* Overlay structure for a string that depends on another string's characters.
79
* Distinguished by the JSSTRFLAG_DEPENDENT bit being set in length. The base
80
* member may point to another dependent string if JSSTRING_CHARS has not been
81
* called yet. The length chars in a dependent string are stored starting at
82
* base->chars + start, and are not necessarily zero-terminated. If start is
83
* 0, it is not stored, length is a full size_t (minus the JSSTRFLAG_* bits in
84
* the high two positions), and the JSSTRFLAG_PREFIX flag is set.
86
struct JSDependentString {
91
/* Definitions for flags stored in the high order bits of JSString.length. */
92
#define JSSTRFLAG_BITS 2
93
#define JSSTRFLAG_SHIFT(flg) ((size_t)(flg) << JSSTRING_LENGTH_BITS)
94
#define JSSTRFLAG_MASK JSSTRFLAG_SHIFT(JS_BITMASK(JSSTRFLAG_BITS))
95
#define JSSTRFLAG_DEPENDENT JSSTRFLAG_SHIFT(1)
96
#define JSSTRFLAG_PREFIX JSSTRFLAG_SHIFT(2)
98
/* Universal JSString type inquiry and accessor macros. */
99
#define JSSTRING_BIT(n) ((size_t)1 << (n))
100
#define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1)
101
#define JSSTRING_HAS_FLAG(str,flg) ((str)->length & (flg))
102
#define JSSTRING_IS_DEPENDENT(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT)
103
#define JSSTRING_IS_PREFIX(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX)
104
#define JSSTRING_CHARS(str) (JSSTRING_IS_DEPENDENT(str) \
105
? JSSTRDEP_CHARS(str) \
107
#define JSSTRING_LENGTH(str) (JSSTRING_IS_DEPENDENT(str) \
108
? JSSTRDEP_LENGTH(str) \
110
#define JSSTRING_LENGTH_BITS (sizeof(size_t) * JS_BITS_PER_BYTE \
112
#define JSSTRING_LENGTH_MASK JSSTRING_BITMASK(JSSTRING_LENGTH_BITS)
114
/* Specific JSDependentString shift/mask accessor and mutator macros. */
115
#define JSSTRDEP_START_BITS (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS)
116
#define JSSTRDEP_START_SHIFT JSSTRDEP_LENGTH_BITS
117
#define JSSTRDEP_START_MASK JSSTRING_BITMASK(JSSTRDEP_START_BITS)
118
#define JSSTRDEP_LENGTH_BITS (JSSTRING_LENGTH_BITS / 2)
119
#define JSSTRDEP_LENGTH_MASK JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS)
121
#define JSSTRDEP(str) ((JSDependentString *)(str))
122
#define JSSTRDEP_START(str) (JSSTRING_IS_PREFIX(str) ? 0 \
123
: ((JSSTRDEP(str)->length \
124
>> JSSTRDEP_START_SHIFT) \
125
& JSSTRDEP_START_MASK))
126
#define JSSTRDEP_LENGTH(str) (JSSTRDEP(str)->length \
127
& (JSSTRING_IS_PREFIX(str) \
128
? JSSTRING_LENGTH_MASK \
129
: JSSTRDEP_LENGTH_MASK))
131
#define JSSTRDEP_SET_START_AND_LENGTH(str,off,len) \
132
(JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT \
133
| ((off) << JSSTRDEP_START_SHIFT) \
135
#define JSPREFIX_SET_LENGTH(str,len) \
136
(JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len))
138
#define JSSTRDEP_BASE(str) (JSSTRDEP(str)->base)
139
#define JSSTRDEP_SET_BASE(str,bstr) (JSSTRDEP(str)->base = (bstr))
140
#define JSPREFIX_BASE(str) JSSTRDEP_BASE(str)
141
#define JSPREFIX_SET_BASE(str,bstr) JSSTRDEP_SET_BASE(str,bstr)
143
#define JSSTRDEP_CHARS(str) \
144
(JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str)) \
145
? js_GetDependentStringChars(str) \
146
: JSSTRDEP_BASE(str)->chars + JSSTRDEP_START(str))
149
js_MinimizeDependentStrings(JSString *str, int level, JSString **basep);
152
js_GetDependentStringChars(JSString *str);
155
js_GetStringChars(JSString *str);
158
js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
160
extern const jschar *
161
js_UndependString(JSContext *cx, JSString *str);
168
extern jschar js_empty_ucstr[];
169
extern JSSubString js_EmptySubString;
171
/* Unicode character attribute lookup tables. */
172
extern const uint8 js_X[];
173
extern const uint8 js_Y[];
174
extern const uint32 js_A[];
176
/* Enumerated Unicode general category types. */
177
typedef enum JSCharType {
179
JSCT_UPPERCASE_LETTER = 1,
180
JSCT_LOWERCASE_LETTER = 2,
181
JSCT_TITLECASE_LETTER = 3,
182
JSCT_MODIFIER_LETTER = 4,
183
JSCT_OTHER_LETTER = 5,
184
JSCT_NON_SPACING_MARK = 6,
185
JSCT_ENCLOSING_MARK = 7,
186
JSCT_COMBINING_SPACING_MARK = 8,
187
JSCT_DECIMAL_DIGIT_NUMBER = 9,
188
JSCT_LETTER_NUMBER = 10,
189
JSCT_OTHER_NUMBER = 11,
190
JSCT_SPACE_SEPARATOR = 12,
191
JSCT_LINE_SEPARATOR = 13,
192
JSCT_PARAGRAPH_SEPARATOR = 14,
195
JSCT_PRIVATE_USE = 18,
197
JSCT_DASH_PUNCTUATION = 20,
198
JSCT_START_PUNCTUATION = 21,
199
JSCT_END_PUNCTUATION = 22,
200
JSCT_CONNECTOR_PUNCTUATION = 23,
201
JSCT_OTHER_PUNCTUATION = 24,
202
JSCT_MATH_SYMBOL = 25,
203
JSCT_CURRENCY_SYMBOL = 26,
204
JSCT_MODIFIER_SYMBOL = 27,
205
JSCT_OTHER_SYMBOL = 28
208
/* Character classifying and mapping macros, based on java.lang.Character. */
209
#define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
210
#define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
212
#define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
213
(1 << JSCT_LOWERCASE_LETTER) | \
214
(1 << JSCT_TITLECASE_LETTER) | \
215
(1 << JSCT_MODIFIER_LETTER) | \
216
(1 << JSCT_OTHER_LETTER)) \
219
#define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
220
(1 << JSCT_LOWERCASE_LETTER) | \
221
(1 << JSCT_TITLECASE_LETTER) | \
222
(1 << JSCT_MODIFIER_LETTER) | \
223
(1 << JSCT_OTHER_LETTER) | \
224
(1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
227
/* A unicode letter, suitable for use in an identifier. */
228
#define JS_ISUC_LETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
229
(1 << JSCT_LOWERCASE_LETTER) | \
230
(1 << JSCT_TITLECASE_LETTER) | \
231
(1 << JSCT_MODIFIER_LETTER) | \
232
(1 << JSCT_OTHER_LETTER) | \
233
(1 << JSCT_LETTER_NUMBER)) \
237
* 'IdentifierPart' from ECMA grammar, is Unicode letter or
238
* combining mark or digit or connector punctuation.
240
#define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
241
(1 << JSCT_LOWERCASE_LETTER) | \
242
(1 << JSCT_TITLECASE_LETTER) | \
243
(1 << JSCT_MODIFIER_LETTER) | \
244
(1 << JSCT_OTHER_LETTER) | \
245
(1 << JSCT_LETTER_NUMBER) | \
246
(1 << JSCT_NON_SPACING_MARK) | \
247
(1 << JSCT_COMBINING_SPACING_MARK) | \
248
(1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
249
(1 << JSCT_CONNECTOR_PUNCTUATION)) \
252
/* Unicode control-format characters, ignored in input */
253
#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
255
#define JS_ISWORD(c) (JS_ISALNUM(c) || (c) == '_')
257
/* XXXbe unify on A/X/Y tbls, avoid ctype.h? */
258
#define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$')
259
#define JS_ISIDENT(c) (JS_ISID_PART(c) || (c) == '_' || (c) == '$')
261
#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
263
/* XXXbe fs, etc. ? */
264
#define JS_ISSPACE(c) ((JS_CCODE(c) & 0x00070000) == 0x00040000)
265
#define JS_ISPRINT(c) ((c) < 128 && isprint(c))
267
#define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
268
#define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
270
#define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
271
? (c) - ((int32)JS_CCODE(c) >> 22) \
273
#define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
274
? (c) + ((int32)JS_CCODE(c) >> 22) \
277
#define JS_TOCTRL(c) ((c) ^ 64) /* XXX unsafe! requires uppercase c */
279
/* Shorthands for ASCII (7-bit) decimal and hex conversion. */
280
#define JS7_ISDEC(c) ((c) < 128 && isdigit(c))
281
#define JS7_UNDEC(c) ((c) - '0')
282
#define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
283
#define JS7_UNHEX(c) (uintN)(isdigit(c) ? (c) - '0' : 10 + tolower(c) - 'a')
284
#define JS7_ISLET(c) ((c) < 128 && isalpha(c))
286
/* Initialize truly global state associated with JS strings. */
288
js_InitStringGlobals(void);
291
js_FreeStringGlobals(void);
294
js_PurgeDeflatedStringCache(JSString *str);
296
/* Initialize per-runtime string state for the first context in the runtime. */
298
js_InitRuntimeStringState(JSContext *cx);
301
js_FinishRuntimeStringState(JSContext *cx);
303
/* Initialize the String class, returning its prototype object. */
305
js_InitStringClass(JSContext *cx, JSObject *obj);
307
extern const char js_escape_str[];
308
extern const char js_unescape_str[];
309
extern const char js_uneval_str[];
310
extern const char js_decodeURI_str[];
311
extern const char js_encodeURI_str[];
312
extern const char js_decodeURIComponent_str[];
313
extern const char js_encodeURIComponent_str[];
315
/* GC-allocate a string descriptor for the given malloc-allocated chars. */
317
js_NewString(JSContext *cx, jschar *chars, size_t length, uintN gcflag);
320
js_NewDependentString(JSContext *cx, JSString *base, size_t start,
321
size_t length, uintN gcflag);
323
/* Copy a counted string and GC-allocate a descriptor for it. */
325
js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n, uintN gcflag);
327
/* Copy a C string and GC-allocate a descriptor for it. */
329
js_NewStringCopyZ(JSContext *cx, const jschar *s, uintN gcflag);
331
/* Free the chars held by str when it is finalized by the GC. */
333
js_FinalizeString(JSContext *cx, JSString *str);
336
js_FinalizeStringRT(JSRuntime *rt, JSString *str);
338
/* Wrap a string value in a String object. */
340
js_StringToObject(JSContext *cx, JSString *str);
343
* Convert a value to a string, returning null after reporting an error,
344
* otherwise returning a new string reference.
347
js_ValueToString(JSContext *cx, jsval v);
350
* Convert a value to its source expression, returning null after reporting
351
* an error, otherwise returning a new string reference.
354
js_ValueToSource(JSContext *cx, jsval v);
356
#ifdef HT_ENUMERATE_NEXT /* XXX don't require jshash.h */
358
* Compute a hash function from str.
361
js_HashString(JSString *str);
365
* Return less than, equal to, or greater than zero depending on whether
366
* str1 is less than, equal to, or greater than str2.
369
js_CompareStrings(JSString *str1, JSString *str2);
372
* Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
373
* The patlen argument must be positive and no greater than BMH_PATLEN_MAX.
374
* The start argument tells where in text to begin the search.
376
* Return the index of pat in text, or -1 if not found.
378
#define BMH_CHARSET_SIZE 256 /* ISO-Latin-1 */
379
#define BMH_PATLEN_MAX 255 /* skip table element is uint8 */
381
#define BMH_BAD_PATTERN (-2) /* return value if pat is not ISO-Latin-1 */
384
js_BoyerMooreHorspool(const jschar *text, jsint textlen,
385
const jschar *pat, jsint patlen,
389
js_strlen(const jschar *s);
392
js_strchr(const jschar *s, jschar c);
395
js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
397
#define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
400
* Return s advanced past any Unicode white space characters.
402
extern const jschar *
403
js_SkipWhiteSpace(const jschar *s);
406
* Inflate bytes to JS chars and vice versa. Report out of memory via cx
407
* and return null on error, otherwise return the jschar or byte vector that
411
js_InflateString(JSContext *cx, const char *bytes, size_t length);
414
js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
417
* Inflate bytes to JS chars into a buffer.
418
* 'chars' must be large enough for 'length'+1 jschars.
421
js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length);
424
* Associate bytes with str in the deflated string cache, returning true on
425
* successful association, false on out of memory.
428
js_SetStringBytes(JSString *str, char *bytes, size_t length);
431
* Find or create a deflated string cache entry for str that contains its
432
* characters chopped from Unicode code points into bytes.
435
js_GetStringBytes(JSString *str);
438
js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
443
#endif /* jsstr_h___ */