1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3
* ***** BEGIN LICENSE BLOCK *****
4
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
* The contents of this file are subject to the Mozilla Public License Version
7
* 1.1 (the "License"); you may not use this file except in compliance with
8
* the License. You may obtain a copy of the License at
9
* http://www.mozilla.org/MPL/
11
* Software distributed under the License is distributed on an "AS IS" basis,
12
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
* for the specific language governing rights and limitations under the
16
* The Original Code is Mozilla Communicator client code, released
19
* The Initial Developer of the Original Code is
20
* Netscape Communications Corporation.
21
* Portions created by the Initial Developer are Copyright (C) 1998
22
* the Initial Developer. All Rights Reserved.
26
* Alternatively, the contents of this file may be used under the terms of
27
* either of the GNU General Public License Version 2 or later (the "GPL"),
28
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29
* in which case the provisions of the GPL or the LGPL are applicable instead
30
* of those above. If you wish to allow use of your version of this file only
31
* under the terms of either the GPL or the LGPL, and not to allow others to
32
* use your version of this file under the terms of the MPL, indicate your
33
* decision by deleting the provisions above and replace them with the notice
34
* and other provisions required by the GPL or the LGPL. If you do not delete
35
* the provisions above, a recipient may use your version of this file under
36
* the terms of any one of the MPL, the GPL or the LGPL.
38
* ***** END LICENSE BLOCK ***** */
46
#include "jshashtable.h"
55
* Conceptually, a JS string is just an array of chars and a length. To improve
56
* performance of common string operations, the following optimizations are
57
* made which affect the engine's representation of strings:
59
* - The plain vanilla representation is a "flat" string which consists of a
60
* string header in the GC heap and a malloc'd null terminated char array.
62
* - To avoid copying a substring of an existing "base" string , a "dependent"
63
* string (JSDependentString) can be created which points into the base
64
* string's char array.
66
* - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created
67
* to represent a delayed string concatenation. Concatenation (called
68
* flattening) is performed if and when a linear char array is requested. In
69
* general, ropes form a binary dag whose internal nodes are JSRope string
70
* headers with no associated char array and whose leaf nodes are either flat
71
* or dependent strings.
73
* - To avoid copying the left-hand side when flattening, the left-hand side's
74
* buffer may be grown to make space for a copy of the right-hand side (see
75
* comment in JSString::flatten). This optimization requires that there are
76
* no external pointers into the char array. We conservatively maintain this
77
* property via a flat string's "extensible" property.
79
* - To avoid allocating small char arrays, short strings can be stored inline
80
* in the string header (JSInlineString). To increase the max size of such
81
* inline strings, extra-large string headers can be used (JSShortString).
83
* - To avoid comparing O(n) string equality comparison, strings can be
84
* canonicalized to "atoms" (JSAtom) such that there is a single atom with a
85
* given (length,chars).
87
* - To avoid dynamic creation of common short strings (e.g., single-letter
88
* alphanumeric strings, numeric strings up to 999) headers and char arrays
89
* for such strings are allocated in static memory (JSStaticAtom) and used
92
* - To avoid copying all strings created through the JSAPI, an "external"
93
* string (JSExternalString) can be created whose chars are managed by the
96
* Although all strings share the same basic memory layout, we can conceptually
97
* arrange them into a hierarchy of operations/invariants and represent this
98
* hierarchy in C++ with classes:
100
* C++ type operations+fields / invariants+properties
102
* JSString (abstract) getCharsZ, getChars, length / -
104
* | JSRope leftChild, rightChild / -
106
* JSLinearString (abstract) chars / not null-terminated
108
* | JSDependentString base / -
110
* JSFlatString (abstract) chars / not null-terminated
112
* | JSExtensibleString capacity / no external pointers into char array
114
* JSFixedString - / may have external pointers into char array
116
* | \ JSExternalString - / char array memory managed by embedding
118
* | JSInlineString - / chars stored in header
120
* | | JSShortString - / header is fat
122
* JSAtom | | - / string equality === pointer equality
124
* | JSInlineAtom | - / atomized JSInlineString
126
* | JSShortAtom - / atomized JSShortString
128
* JSStaticAtom - / header and chars statically allocated
130
* Classes marked with (abstract) above are not literally C++ Abstract Base
131
* Classes (since there are no virtual functions, pure or not, in this
132
* hierarchy), but have the same meaning: there are no strings with this type as
133
* its most-derived type.
135
* Derived string types can be queried from ancestor types via isX() and
136
* retrieved with asX() debug-only-checked casts.
138
* The ensureX() operations mutate 'this' in place to effectively the type to be
139
* at least X (e.g., ensureLinear will change a JSRope to be a JSFlatString).
142
class JSString : public js::gc::Cell
145
static const size_t NUM_INLINE_CHARS = 2 * sizeof(void *) / sizeof(jschar);
147
/* Fields only apply to string types commented on the right. */
150
size_t lengthAndFlags; /* JSString */
152
const jschar *chars; /* JSLinearString */
153
JSString *left; /* JSRope */
156
jschar inlineStorage[NUM_INLINE_CHARS]; /* JS(Inline|Short)String */
159
JSLinearString *base; /* JSDependentString */
160
JSString *right; /* JSRope */
161
size_t capacity; /* JSFlatString (extensible) */
162
size_t externalType; /* JSExternalString */
165
JSString *parent; /* JSRope (temporary) */
166
void *externalClosure; /* JSExternalString */
167
size_t reserved; /* may use for bug 615290 */
174
/* Flags exposed only for jits */
176
static const size_t LENGTH_SHIFT = 4;
177
static const size_t FLAGS_MASK = JS_BITMASK(LENGTH_SHIFT);
178
static const size_t MAX_LENGTH = JS_BIT(32 - LENGTH_SHIFT) - 1;
181
* The low LENGTH_SHIFT bits of lengthAndFlags are used to encode the type
182
* of the string. The remaining bits store the string length (which must be
183
* less or equal than MAX_LENGTH).
185
* Instead of using a dense index to represent the most-derived type, string
186
* types are encoded to allow single-op tests for hot queries (isRope,
187
* isDependent, isFlat, isAtom, isStaticAtom):
190
* JSLinearString xxx0
191
* JSDependentString xx1x
193
* JSExtensibleString 1100
194
* JSFixedString xy00 where xy != 11
195
* JSInlineString 0100 and chars == inlineStorage
196
* JSShortString 0100 and in FINALIZE_SHORT_STRING arena
197
* JSExternalString 0100 and in FINALIZE_EXTERNAL_STRING arena
201
* NB: this scheme takes advantage of the fact that there are no string
202
* instances whose most-derived type is JSString, JSLinearString, or
206
static const size_t ROPE_BIT = JS_BIT(0);
208
static const size_t LINEAR_MASK = JS_BITMASK(1);
209
static const size_t LINEAR_FLAGS = 0x0;
211
static const size_t DEPENDENT_BIT = JS_BIT(1);
213
static const size_t FLAT_MASK = JS_BITMASK(2);
214
static const size_t FLAT_FLAGS = 0x0;
216
static const size_t FIXED_FLAGS = JS_BIT(2);
218
static const size_t ATOM_MASK = JS_BITMASK(3);
219
static const size_t ATOM_FLAGS = 0x0;
221
static const size_t STATIC_ATOM_MASK = JS_BITMASK(4);
222
static const size_t STATIC_ATOM_FLAGS = 0x0;
224
static const size_t EXTENSIBLE_FLAGS = JS_BIT(2) | JS_BIT(3);
225
static const size_t NON_STATIC_ATOM = JS_BIT(3);
227
size_t buildLengthAndFlags(size_t length, size_t flags) {
228
return (length << LENGTH_SHIFT) | flags;
231
static void staticAsserts() {
232
JS_STATIC_ASSERT(size_t(JSString::MAX_LENGTH) <= size_t(JSVAL_INT_MAX));
233
JS_STATIC_ASSERT(JSString::MAX_LENGTH <= JSVAL_INT_MAX);
234
JS_STATIC_ASSERT(JS_BITS_PER_WORD >= 32);
235
JS_STATIC_ASSERT(((JSString::MAX_LENGTH << JSString::LENGTH_SHIFT) >>
236
JSString::LENGTH_SHIFT) == JSString::MAX_LENGTH);
237
JS_STATIC_ASSERT(sizeof(JSString) ==
238
offsetof(JSString, d.inlineStorage) +
239
NUM_INLINE_CHARS * sizeof(jschar));
242
/* Avoid lame compile errors in JSRope::flatten */
246
/* All strings have length. */
249
size_t length() const {
250
return d.lengthAndFlags >> LENGTH_SHIFT;
255
return d.lengthAndFlags <= FLAGS_MASK;
259
* All strings have a fallible operation to get an array of chars.
260
* getCharsZ additionally ensures the array is null terminated.
263
inline const jschar *getChars(JSContext *cx);
264
inline const jschar *getCharsZ(JSContext *cx);
266
/* Fallible conversions to more-derived string types. */
268
inline JSLinearString *ensureLinear(JSContext *cx);
269
inline JSFlatString *ensureFlat(JSContext *cx);
270
inline JSFixedString *ensureFixed(JSContext *cx);
272
/* Type query and debug-checked casts */
275
bool isRope() const {
276
bool rope = d.lengthAndFlags & ROPE_BIT;
277
JS_ASSERT_IF(rope, (d.lengthAndFlags & FLAGS_MASK) == ROPE_BIT);
284
return *(JSRope *)this;
288
bool isLinear() const {
289
return (d.lengthAndFlags & LINEAR_MASK) == LINEAR_FLAGS;
293
JSLinearString &asLinear() {
294
JS_ASSERT(isLinear());
295
return *(JSLinearString *)this;
299
bool isDependent() const {
300
bool dependent = d.lengthAndFlags & DEPENDENT_BIT;
301
JS_ASSERT_IF(dependent, (d.lengthAndFlags & FLAGS_MASK) == DEPENDENT_BIT);
306
JSDependentString &asDependent() {
307
JS_ASSERT(isDependent());
308
return *(JSDependentString *)this;
312
bool isFlat() const {
313
return (d.lengthAndFlags & FLAT_MASK) == FLAT_FLAGS;
317
JSFlatString &asFlat() {
319
return *(JSFlatString *)this;
323
bool isExtensible() const {
324
return (d.lengthAndFlags & FLAGS_MASK) == EXTENSIBLE_FLAGS;
328
JSExtensibleString &asExtensible() const {
329
JS_ASSERT(isExtensible());
330
return *(JSExtensibleString *)this;
334
bool isShort() const;
335
bool isFixed() const;
339
JSFixedString &asFixed() {
340
JS_ASSERT(isFixed());
341
return *(JSFixedString *)this;
344
bool isExternal() const;
347
JSExternalString &asExternal() {
348
JS_ASSERT(isExternal());
349
return *(JSExternalString *)this;
353
bool isAtom() const {
354
bool atomized = (d.lengthAndFlags & ATOM_MASK) == ATOM_FLAGS;
355
JS_ASSERT_IF(atomized, isFlat());
360
JSAtom &asAtom() const {
362
return *(JSAtom *)this;
366
bool isStaticAtom() const {
367
return (d.lengthAndFlags & FLAGS_MASK) == STATIC_ATOM_FLAGS;
370
/* Only called by the GC for strings with the FINALIZE_STRING kind. */
372
inline void finalize(JSContext *cx);
374
/* Called during GC for any string. */
376
void mark(JSTracer *trc);
378
/* Offsets for direct field from jit code. */
380
static size_t offsetOfLengthAndFlags() {
381
return offsetof(JSString, d.lengthAndFlags);
384
static size_t offsetOfChars() {
385
return offsetof(JSString, d.u1.chars);
389
class JSRope : public JSString
391
friend class JSString;
392
JSFlatString *flatten(JSContext *cx);
394
void init(JSString *left, JSString *right, size_t length);
397
static inline JSRope *new_(JSContext *cx, JSString *left,
398
JSString *right, size_t length);
400
inline JSString *leftChild() const {
405
inline JSString *rightChild() const {
411
JS_STATIC_ASSERT(sizeof(JSRope) == sizeof(JSString));
413
class JSLinearString : public JSString
415
friend class JSString;
416
void mark(JSTracer *trc);
420
const jschar *chars() const {
421
JS_ASSERT(isLinear());
426
JS_STATIC_ASSERT(sizeof(JSLinearString) == sizeof(JSString));
428
class JSDependentString : public JSLinearString
430
friend class JSString;
431
JSFixedString *undepend(JSContext *cx);
433
void init(JSLinearString *base, const jschar *chars, size_t length);
436
static inline JSDependentString *new_(JSContext *cx, JSLinearString *base,
437
const jschar *chars, size_t length);
439
JSLinearString *base() const {
440
JS_ASSERT(isDependent());
445
JS_STATIC_ASSERT(sizeof(JSDependentString) == sizeof(JSString));
447
class JSFlatString : public JSLinearString
450
void morphExtensibleIntoDependent(JSLinearString *base) {
451
d.lengthAndFlags = buildLengthAndFlags(length(), DEPENDENT_BIT);
457
const jschar *charsZ() const {
462
/* Only called by the GC for strings with the FINALIZE_STRING kind. */
464
inline void finalize(JSRuntime *rt);
467
JS_STATIC_ASSERT(sizeof(JSFlatString) == sizeof(JSString));
469
class JSExtensibleString : public JSFlatString
473
size_t capacity() const {
474
JS_ASSERT(isExtensible());
475
return d.s.u2.capacity;
479
JS_STATIC_ASSERT(sizeof(JSExtensibleString) == sizeof(JSString));
481
class JSFixedString : public JSFlatString
483
void init(const jschar *chars, size_t length);
486
static inline JSFixedString *new_(JSContext *cx, const jschar *chars, size_t length);
489
* Once a JSFixedString has been added to the atom state, this operation
490
* changes the type (in place, as reflected by the flag bits) of the
491
* JSFixedString into a JSAtom.
493
inline JSAtom *morphAtomizedStringIntoAtom();
496
JS_STATIC_ASSERT(sizeof(JSFixedString) == sizeof(JSString));
498
class JSInlineString : public JSFixedString
500
static const size_t MAX_INLINE_LENGTH = NUM_INLINE_CHARS - 1;
503
static inline JSInlineString *new_(JSContext *cx);
505
inline jschar *init(size_t length);
507
inline void resetLength(size_t length);
509
static bool lengthFits(size_t length) {
510
return length <= MAX_INLINE_LENGTH;
515
JS_STATIC_ASSERT(sizeof(JSInlineString) == sizeof(JSString));
517
class JSShortString : public JSInlineString
519
/* This can be any value that is a multiple of sizeof(gc::FreeCell). */
520
static const size_t INLINE_EXTENSION_CHARS = sizeof(JSString::Data) / sizeof(jschar);
522
static void staticAsserts() {
523
JS_STATIC_ASSERT(INLINE_EXTENSION_CHARS % sizeof(js::gc::FreeCell) == 0);
524
JS_STATIC_ASSERT(MAX_SHORT_LENGTH + 1 ==
525
(sizeof(JSShortString) -
526
offsetof(JSShortString, d.inlineStorage)) / sizeof(jschar));
529
jschar inlineStorageExtension[INLINE_EXTENSION_CHARS];
532
static inline JSShortString *new_(JSContext *cx);
534
jschar *inlineStorageBeforeInit() {
535
return d.inlineStorage;
538
inline void initAtOffsetInBuffer(const jschar *chars, size_t length);
540
static const size_t MAX_SHORT_LENGTH = JSString::NUM_INLINE_CHARS +
541
INLINE_EXTENSION_CHARS
542
-1 /* null terminator */;
544
static bool lengthFits(size_t length) {
545
return length <= MAX_SHORT_LENGTH;
548
/* Only called by the GC for strings with the FINALIZE_EXTERNAL_STRING kind. */
550
JS_ALWAYS_INLINE void finalize(JSContext *cx);
553
JS_STATIC_ASSERT(sizeof(JSShortString) == 2 * sizeof(JSString));
556
* The externalClosure stored in an external string is a black box to the JS
557
* engine; see JS_NewExternalStringWithClosure.
559
class JSExternalString : public JSFixedString
561
static void staticAsserts() {
562
JS_STATIC_ASSERT(TYPE_LIMIT == 8);
565
void init(const jschar *chars, size_t length, intN type, void *closure);
568
static inline JSExternalString *new_(JSContext *cx, const jschar *chars,
569
size_t length, intN type, void *closure);
571
intN externalType() const {
572
JS_ASSERT(isExternal());
573
JS_ASSERT(d.s.u2.externalType < TYPE_LIMIT);
574
return d.s.u2.externalType;
577
void *externalClosure() const {
578
JS_ASSERT(isExternal());
579
return d.s.u3.externalClosure;
582
static const uintN TYPE_LIMIT = 8;
583
static JSStringFinalizeOp str_finalizers[TYPE_LIMIT];
585
static intN changeFinalizer(JSStringFinalizeOp oldop,
586
JSStringFinalizeOp newop) {
587
for (uintN i = 0; i != JS_ARRAY_LENGTH(str_finalizers); i++) {
588
if (str_finalizers[i] == oldop) {
589
str_finalizers[i] = newop;
596
/* Only called by the GC for strings with the FINALIZE_EXTERNAL_STRING kind. */
598
void finalize(JSContext *cx);
602
JS_STATIC_ASSERT(sizeof(JSExternalString) == sizeof(JSString));
604
class JSAtom : public JSFixedString
607
/* Exposed only for jits. */
609
static const size_t UNIT_STATIC_LIMIT = 256U;
610
static const size_t SMALL_CHAR_LIMIT = 128U; /* Bigger chars cannot be in a length-2 string. */
611
static const size_t NUM_SMALL_CHARS = 64U;
612
static const size_t INT_STATIC_LIMIT = 256U;
613
static const size_t NUM_HUNDRED_STATICS = 156U;
616
# pragma align 8 (__1cGJSAtomPunitStaticTable_, __1cGJSAtomSlength2StaticTable_, __1cGJSAtomShundredStaticTable_)
618
static const JSString::Data unitStaticTable[];
619
static const JSString::Data length2StaticTable[];
620
static const JSString::Data hundredStaticTable[];
621
static const JSString::Data *const intStaticTable[];
624
/* Defined in jsgcinlines.h */
625
static inline bool isUnitString(const void *ptr);
626
static inline bool isLength2String(const void *ptr);
627
static inline bool isHundredString(const void *ptr);
629
typedef uint8 SmallChar;
630
static const SmallChar INVALID_SMALL_CHAR = -1;
632
static inline bool fitsInSmallChar(jschar c);
634
static const jschar fromSmallChar[];
635
static const SmallChar toSmallChar[];
637
static void staticAsserts() {
638
JS_STATIC_ASSERT(sizeof(JSString::Data) == sizeof(JSString));
641
static JSStaticAtom &length2Static(jschar c1, jschar c2);
642
static JSStaticAtom &length2Static(uint32 i);
646
* While this query can be used for any pointer to GC thing, given a
647
* JSString 'str', it is more efficient to use 'str->isStaticAtom()'.
649
static inline bool isStatic(const void *ptr);
651
static inline bool hasIntStatic(int32 i);
652
static inline JSStaticAtom &intStatic(jsint i);
654
static inline bool hasUnitStatic(jschar c);
655
static JSStaticAtom &unitStatic(jschar c);
657
/* May not return atom, returns null on (reported) failure. */
658
static inline JSLinearString *getUnitStringForElement(JSContext *cx, JSString *str, size_t index);
660
/* Return null if no static atom exists for the given (chars, length). */
661
static inline JSStaticAtom *lookupStatic(const jschar *chars, size_t length);
663
inline void finalize(JSRuntime *rt);
666
JS_STATIC_ASSERT(sizeof(JSAtom) == sizeof(JSString));
668
class JSInlineAtom : public JSInlineString /*, JSAtom */
671
* JSInlineAtom is not explicitly used and is only present for consistency.
672
* See Atomize() for how JSInlineStrings get morphed into JSInlineAtoms.
676
JS_STATIC_ASSERT(sizeof(JSInlineAtom) == sizeof(JSInlineString));
678
class JSShortAtom : public JSShortString /*, JSInlineAtom */
681
* JSShortAtom is not explicitly used and is only present for consistency.
682
* See Atomize() for how JSShortStrings get morphed into JSShortAtoms.
686
JS_STATIC_ASSERT(sizeof(JSShortAtom) == sizeof(JSShortString));
688
class JSStaticAtom : public JSAtom
691
JS_STATIC_ASSERT(sizeof(JSStaticAtom) == sizeof(JSString));
693
/* Avoid requring jsstrinlines.h just to call getChars. */
695
JS_ALWAYS_INLINE const jschar *
696
JSString::getChars(JSContext *cx)
698
if (JSLinearString *str = ensureLinear(cx))
703
JS_ALWAYS_INLINE const jschar *
704
JSString::getCharsZ(JSContext *cx)
706
if (JSFlatString *str = ensureFlat(cx))
711
JS_ALWAYS_INLINE JSLinearString *
712
JSString::ensureLinear(JSContext *cx)
716
: asRope().flatten(cx);
719
JS_ALWAYS_INLINE JSFlatString *
720
JSString::ensureFlat(JSContext *cx)
725
? asDependent().undepend(cx)
726
: asRope().flatten(cx);
729
JS_ALWAYS_INLINE JSFixedString *
730
JSString::ensureFixed(JSContext *cx)
734
if (isExtensible()) {
735
JS_ASSERT((d.lengthAndFlags & FLAT_MASK) == 0);
736
JS_STATIC_ASSERT(EXTENSIBLE_FLAGS == (JS_BIT(2) | JS_BIT(3)));
737
JS_STATIC_ASSERT(FIXED_FLAGS == JS_BIT(2));
738
d.lengthAndFlags ^= JS_BIT(3);
745
/* Implemented in jsstrinlines.h */
749
* When an algorithm does not need a string represented as a single linear
750
* array of characters, this range utility may be used to traverse the string a
751
* sequence of linear arrays of characters. This avoids flattening ropes.
753
* Implemented in jsstrinlines.h.
755
class StringSegmentRange;
756
class MutatingRopeSegmentRange;
759
* Utility for building a rope (lazy concatenation) of strings.
765
extern JSString * JS_FASTCALL
766
js_ConcatStrings(JSContext *cx, JSString *s1, JSString *s2);
768
extern JSString * JS_FASTCALL
769
js_toLowerCase(JSContext *cx, JSString *str);
771
extern JSString * JS_FASTCALL
772
js_toUpperCase(JSContext *cx, JSString *str);
779
extern jschar js_empty_ucstr[];
780
extern JSSubString js_EmptySubString;
782
/* Unicode character attribute lookup tables. */
783
extern const uint8 js_X[];
784
extern const uint8 js_Y[];
785
extern const uint32 js_A[];
787
/* Enumerated Unicode general category types. */
788
typedef enum JSCharType {
790
JSCT_UPPERCASE_LETTER = 1,
791
JSCT_LOWERCASE_LETTER = 2,
792
JSCT_TITLECASE_LETTER = 3,
793
JSCT_MODIFIER_LETTER = 4,
794
JSCT_OTHER_LETTER = 5,
795
JSCT_NON_SPACING_MARK = 6,
796
JSCT_ENCLOSING_MARK = 7,
797
JSCT_COMBINING_SPACING_MARK = 8,
798
JSCT_DECIMAL_DIGIT_NUMBER = 9,
799
JSCT_LETTER_NUMBER = 10,
800
JSCT_OTHER_NUMBER = 11,
801
JSCT_SPACE_SEPARATOR = 12,
802
JSCT_LINE_SEPARATOR = 13,
803
JSCT_PARAGRAPH_SEPARATOR = 14,
806
JSCT_PRIVATE_USE = 18,
808
JSCT_DASH_PUNCTUATION = 20,
809
JSCT_START_PUNCTUATION = 21,
810
JSCT_END_PUNCTUATION = 22,
811
JSCT_CONNECTOR_PUNCTUATION = 23,
812
JSCT_OTHER_PUNCTUATION = 24,
813
JSCT_MATH_SYMBOL = 25,
814
JSCT_CURRENCY_SYMBOL = 26,
815
JSCT_MODIFIER_SYMBOL = 27,
816
JSCT_OTHER_SYMBOL = 28
819
/* Character classifying and mapping macros, based on java.lang.Character. */
820
#define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
821
#define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
823
#define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
824
(1 << JSCT_LOWERCASE_LETTER) | \
825
(1 << JSCT_TITLECASE_LETTER) | \
826
(1 << JSCT_MODIFIER_LETTER) | \
827
(1 << JSCT_OTHER_LETTER)) \
830
#define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
831
(1 << JSCT_LOWERCASE_LETTER) | \
832
(1 << JSCT_TITLECASE_LETTER) | \
833
(1 << JSCT_MODIFIER_LETTER) | \
834
(1 << JSCT_OTHER_LETTER) | \
835
(1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
838
/* A unicode letter, suitable for use in an identifier. */
839
#define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
840
(1 << JSCT_LOWERCASE_LETTER) | \
841
(1 << JSCT_TITLECASE_LETTER) | \
842
(1 << JSCT_MODIFIER_LETTER) | \
843
(1 << JSCT_OTHER_LETTER) | \
844
(1 << JSCT_LETTER_NUMBER)) \
848
* 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or
849
* digit or connector punctuation.
851
#define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
852
(1 << JSCT_LOWERCASE_LETTER) | \
853
(1 << JSCT_TITLECASE_LETTER) | \
854
(1 << JSCT_MODIFIER_LETTER) | \
855
(1 << JSCT_OTHER_LETTER) | \
856
(1 << JSCT_LETTER_NUMBER) | \
857
(1 << JSCT_NON_SPACING_MARK) | \
858
(1 << JSCT_COMBINING_SPACING_MARK) | \
859
(1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
860
(1 << JSCT_CONNECTOR_PUNCTUATION)) \
863
/* Unicode control-format characters, ignored in input */
864
#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
867
* This table is used in JS_ISWORD. The definition has external linkage to
868
* allow the raw table data to be used in the regular expression compiler.
870
extern const bool js_alnum[];
873
* This macro performs testing for the regular expression word class \w, which
874
* is defined by ECMA-262 15.10.2.6 to be [0-9A-Z_a-z]. If we want a
875
* Unicode-friendlier definition of "word", we should rename this macro to
876
* something regexp-y.
878
#define JS_ISWORD(c) ((c) < 128 && js_alnum[(c)])
880
extern const bool js_isidstart[];
881
extern const bool js_isident[];
888
return (w < 128) ? js_isidstart[w] : JS_ISLETTER(c);
896
return (w < 128) ? js_isident[w] : JS_ISIDPART(c);
899
#define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \
901
#define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_')
902
#define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \
903
(c) == '-' || (c) == '_')
904
#define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':')
905
#define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':')
907
#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
909
const jschar BYTE_ORDER_MARK = 0xFEFF;
910
const jschar NO_BREAK_SPACE = 0x00A0;
912
extern const bool js_isspace[];
921
: w == NO_BREAK_SPACE || w == BYTE_ORDER_MARK ||
922
(JS_CCODE(w) & 0x00070000) == 0x00040000;
926
JS_ISSPACE_OR_BOM(int c)
930
/* Treat little- and big-endian BOMs as whitespace for compatibility. */
933
: w == NO_BREAK_SPACE || w == BYTE_ORDER_MARK ||
934
(JS_CCODE(w) & 0x00070000) == 0x00040000 || w == 0xfffe || w == 0xfeff;
937
#define JS_ISPRINT(c) ((c) < 128 && isprint(c))
939
#define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
940
#define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
942
#define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
943
? (c) - ((int32)JS_CCODE(c) >> 22) \
945
#define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
946
? (c) + ((int32)JS_CCODE(c) >> 22) \
950
* Shorthands for ASCII (7-bit) decimal and hex conversion.
951
* Manually inline isdigit for performance; MSVC doesn't do this for us.
953
#define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
954
#define JS7_ISDECNZ(c) ((((unsigned)(c)) - '1') <= 8)
955
#define JS7_UNDEC(c) ((c) - '0')
956
#define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
957
#define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
958
#define JS7_ISLET(c) ((c) < 128 && isalpha(c))
960
/* Initialize the String class, returning its prototype object. */
961
extern js::Class js_StringClass;
964
JSObject::isString() const
966
return getClass() == &js_StringClass;
970
js_InitStringClass(JSContext *cx, JSObject *obj);
972
extern const char js_escape_str[];
973
extern const char js_unescape_str[];
974
extern const char js_uneval_str[];
975
extern const char js_decodeURI_str[];
976
extern const char js_encodeURI_str[];
977
extern const char js_decodeURIComponent_str[];
978
extern const char js_encodeURIComponent_str[];
980
/* GC-allocate a string descriptor for the given malloc-allocated chars. */
981
extern JSFixedString *
982
js_NewString(JSContext *cx, jschar *chars, size_t length);
984
extern JSLinearString *
985
js_NewDependentString(JSContext *cx, JSString *base, size_t start, size_t length);
987
/* Copy a counted string and GC-allocate a descriptor for it. */
988
extern JSFixedString *
989
js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n);
991
extern JSFixedString *
992
js_NewStringCopyN(JSContext *cx, const char *s, size_t n);
994
/* Copy a C string and GC-allocate a descriptor for it. */
995
extern JSFixedString *
996
js_NewStringCopyZ(JSContext *cx, const jschar *s);
998
extern JSFixedString *
999
js_NewStringCopyZ(JSContext *cx, const char *s);
1002
* Convert a value to a printable C string.
1005
js_ValueToPrintable(JSContext *cx, const js::Value &,
1006
JSAutoByteString *bytes, bool asSource = false);
1009
* Convert a value to a string, returning null after reporting an error,
1010
* otherwise returning a new string reference.
1013
js_ValueToString(JSContext *cx, const js::Value &v);
1018
* Most code that calls js_ValueToString knows the value is (probably) not a
1019
* string, so it does not make sense to put this inline fast path into
1022
static JS_ALWAYS_INLINE JSString *
1023
ValueToString_TestForStringInline(JSContext *cx, const Value &v)
1026
return v.toString();
1027
return js_ValueToString(cx, v);
1031
* This function implements E-262-3 section 9.8, toString. Convert the given
1032
* value to a string of jschars appended to the given buffer. On error, the
1033
* passed buffer may have partial results appended.
1036
ValueToStringBuffer(JSContext *cx, const Value &v, StringBuffer &sb);
1038
} /* namespace js */
1041
* Convert a value to its source expression, returning null after reporting
1042
* an error, otherwise returning a new string reference.
1044
extern JS_FRIEND_API(JSString *)
1045
js_ValueToSource(JSContext *cx, const js::Value &v);
1050
* Compute a hash function from str. The caller can call this function even if
1051
* str is not a GC-allocated thing.
1054
HashChars(const jschar *chars, size_t length)
1057
for (; length; chars++, length--)
1058
h = JS_ROTATE_LEFT32(h, 4) ^ *chars;
1063
* Test if strings are equal. The caller can call the function even if str1
1064
* or str2 are not GC-allocated things.
1067
EqualStrings(JSContext *cx, JSString *str1, JSString *str2, JSBool *result);
1069
/* EqualStrings is infallible on linear strings. */
1071
EqualStrings(JSLinearString *str1, JSLinearString *str2);
1074
* Return less than, equal to, or greater than zero depending on whether
1075
* str1 is less than, equal to, or greater than str2.
1078
CompareStrings(JSContext *cx, JSString *str1, JSString *str2, int32 *result);
1081
* Return true if the string matches the given sequence of ASCII bytes.
1084
StringEqualsAscii(JSLinearString *str, const char *asciiBytes);
1089
js_strlen(const jschar *s);
1092
js_strchr(const jschar *s, jschar c);
1095
js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
1097
#define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
1100
* Return s advanced past any Unicode white space characters.
1102
static inline const jschar *
1103
js_SkipWhiteSpace(const jschar *s, const jschar *end)
1105
JS_ASSERT(s <= end);
1106
while (s != end && JS_ISSPACE(*s))
1112
* Some string functions have an optional bool useCESU8 argument.
1113
* CESU-8 (Compatibility Encoding Scheme for UTF-16: 8-bit) is a
1114
* variant of UTF-8 that allows us to store any wide character
1115
* string as a narrow character string. For strings containing
1116
* mostly ascii, it saves space.
1117
* http://www.unicode.org/reports/tr26/
1121
* Inflate bytes to JS chars and vice versa. Report out of memory via cx and
1122
* return null on error, otherwise return the jschar or byte vector that was
1123
* JS_malloc'ed. length is updated to the length of the new string in jschars.
1124
* Using useCESU8 = true treats 'bytes' as CESU-8.
1127
js_InflateString(JSContext *cx, const char *bytes, size_t *length, bool useCESU8 = false);
1130
js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
1133
* Inflate bytes to JS chars into a buffer. 'chars' must be large enough for
1134
* 'length' jschars. The buffer is NOT null-terminated. The destination length
1135
* must be be initialized with the buffer size and will contain on return the
1136
* number of copied chars. Conversion behavior depends on js_CStringsAreUTF8.
1139
js_InflateStringToBuffer(JSContext *cx, const char *bytes, size_t length,
1140
jschar *chars, size_t *charsLength);
1143
* Same as js_InflateStringToBuffer, but treats 'bytes' as UTF-8 or CESU-8.
1146
js_InflateUTF8StringToBuffer(JSContext *cx, const char *bytes, size_t length,
1147
jschar *chars, size_t *charsLength,
1148
bool useCESU8 = false);
1151
* Get number of bytes in the deflated sequence of characters. Behavior depends
1152
* on js_CStringsAreUTF8.
1155
js_GetDeflatedStringLength(JSContext *cx, const jschar *chars,
1156
size_t charsLength);
1159
* Same as js_GetDeflatedStringLength, but treats the result as UTF-8 or CESU-8.
1160
* This function will never fail (return -1) in CESU-8 mode.
1163
js_GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars,
1164
size_t charsLength, bool useCESU8 = false);
1167
* Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
1168
* 'length chars. The buffer is NOT null-terminated. The destination length
1169
* must to be initialized with the buffer size and will contain on return the
1170
* number of copied bytes. Conversion behavior depends on js_CStringsAreUTF8.
1173
js_DeflateStringToBuffer(JSContext *cx, const jschar *chars,
1174
size_t charsLength, char *bytes, size_t *length);
1177
* Same as js_DeflateStringToBuffer, but treats 'bytes' as UTF-8 or CESU-8.
1180
js_DeflateStringToUTF8Buffer(JSContext *cx, const jschar *chars,
1181
size_t charsLength, char *bytes, size_t *length,
1182
bool useCESU8 = false);
1184
/* Export a few natives and a helper to other files in SpiderMonkey. */
1186
js_str_escape(JSContext *cx, uintN argc, js::Value *argv, js::Value *rval);
1189
* The String.prototype.replace fast-native entry point is exported for joined
1190
* function optimization in js{interp,tracer}.cpp.
1194
str_replace(JSContext *cx, uintN argc, js::Value *vp);
1198
js_str_toString(JSContext *cx, uintN argc, js::Value *vp);
1201
js_str_charAt(JSContext *cx, uintN argc, js::Value *vp);
1204
js_str_charCodeAt(JSContext *cx, uintN argc, js::Value *vp);
1207
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
1208
* least 6 bytes long. Return the number of UTF-8 bytes of data written.
1211
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
1216
PutEscapedStringImpl(char *buffer, size_t size, FILE *fp, JSLinearString *str, uint32 quote);
1219
* Write str into buffer escaping any non-printable or non-ASCII character
1220
* using \escapes for JS string literals.
1221
* Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
1222
* the length of the written output, NOT including the NUL. Thus, a return
1223
* value of size or more means that the output was truncated. If buffer
1224
* is null, just returns the length of the output. If quote is not 0, it must
1225
* be a single or double quote character that will quote the output.
1228
PutEscapedString(char *buffer, size_t size, JSLinearString *str, uint32 quote)
1230
size_t n = PutEscapedStringImpl(buffer, size, NULL, str, quote);
1232
/* PutEscapedStringImpl can only fail with a file. */
1233
JS_ASSERT(n != size_t(-1));
1238
* Write str into file escaping any non-printable or non-ASCII character.
1239
* If quote is not 0, it must be a single or double quote character that
1240
* will quote the output.
1243
FileEscapedString(FILE *fp, JSLinearString *str, uint32 quote)
1245
return PutEscapedStringImpl(NULL, 0, fp, str, quote) != size_t(-1);
1248
} /* namespace js */
1251
js_String(JSContext *cx, uintN argc, js::Value *vp);
1253
#endif /* jsstr_h___ */