1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2
* vim: set ts=4 sw=4 et tw=79 ft=cpp:
4
* This Source Code Form is subject to the terms of the Mozilla Public
5
* License, v. 2.0. If a copy of the MPL was not distributed with this
6
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8
#include "mozilla/RangedPtr.h"
10
#include "gc/Marking.h"
13
#include "String-inl.h"
15
#include "jsobjinlines.h"
17
using namespace mozilla;
21
JSString::isShort() const
23
bool is_short = (getAllocKind() == gc::FINALIZE_SHORT_STRING);
24
JS_ASSERT_IF(is_short, isFixed());
29
JSString::isFixed() const
31
return isFlat() && !isExtensible();
35
JSString::isInline() const
37
return isFixed() && (d.u1.chars == d.inlineStorage || isShort());
41
JSString::isExternal() const
43
bool is_external = (getAllocKind() == gc::FINALIZE_EXTERNAL_STRING);
44
JS_ASSERT_IF(is_external, isFixed());
49
JSString::sizeOfExcludingThis(JSMallocSizeOfFun mallocSizeOf)
51
// JSRope: do nothing, we'll count all children chars when we hit the leaf strings.
55
JS_ASSERT(isLinear());
57
// JSDependentString: do nothing, we'll count the chars when we hit the base string.
63
// JSExtensibleString: count the full capacity, not just the used space.
65
JSExtensibleString &extensible = asExtensible();
66
return mallocSizeOf(extensible.chars());
71
// JSExternalString: don't count, the chars could be stored anywhere.
75
// JSInlineString, JSShortString, JSInlineAtom, JSShortAtom: the chars are inline.
79
// JSAtom, JSFixedString, JSUndependedString: measure the space for the
80
// chars. For JSUndependedString, there is no need to count the base
81
// string, for the same reason as JSDependentString above.
82
JSFixedString &fixed = asFixed();
83
return mallocSizeOf(fixed.chars());
90
if (const jschar *chars = getChars(NULL)) {
91
fprintf(stderr, "JSString* (%p) = jschar * (%p) = ",
92
(void *) this, (void *) chars);
94
extern void DumpChars(const jschar *s, size_t n);
95
DumpChars(chars, length());
97
fprintf(stderr, "(oom in JSString::dump)");
103
JSString::equals(const char *s)
105
const jschar *c = getChars(NULL);
107
fprintf(stderr, "OOM in JSString::equals!\n");
120
static JS_ALWAYS_INLINE bool
121
AllocChars(JSContext *maybecx, size_t length, jschar **chars, size_t *capacity)
124
* String length doesn't include the null char, so include it here before
125
* doubling. Adding the null char after doubling would interact poorly with
126
* round-up malloc schemes.
128
size_t numChars = length + 1;
131
* Grow by 12.5% if the buffer is very large. Otherwise, round up to the
132
* next power of 2. This is similar to what we do with arrays; see
133
* JSObject::ensureDenseArrayElements.
135
static const size_t DOUBLING_MAX = 1024 * 1024;
136
numChars = numChars > DOUBLING_MAX ? numChars + (numChars / 8) : RoundUpPow2(numChars);
138
/* Like length, capacity does not include the null char, so take it out. */
139
*capacity = numChars - 1;
141
JS_STATIC_ASSERT(JSString::MAX_LENGTH * sizeof(jschar) < UINT32_MAX);
142
size_t bytes = numChars * sizeof(jschar);
143
*chars = (jschar *)(maybecx ? maybecx->malloc_(bytes) : OffTheBooks::malloc_(bytes));
144
return *chars != NULL;
147
template<JSRope::UsingBarrier b>
149
JSRope::flattenInternal(JSContext *maybecx)
152
* Perform a depth-first dag traversal, splatting each node's characters
153
* into a contiguous buffer. Visit each rope node three times:
154
* 1. record position in the buffer and recurse into left child;
155
* 2. recurse into the right child;
156
* 3. transform the node into a dependent string.
157
* To avoid maintaining a stack, tree nodes are mutated to indicate how many
158
* times they have been visited. Since ropes can be dags, a node may be
159
* encountered multiple times during traversal. However, step 3 above leaves
160
* a valid dependent string, so everything works out. This algorithm is
161
* homomorphic to marking code.
163
* While ropes avoid all sorts of quadratic cases with string
164
* concatenation, they can't help when ropes are immediately flattened.
165
* One idiomatic case that we'd like to keep linear (and has traditionally
166
* been linear in SM and other JS engines) is:
173
* To do this, when the buffer for a to-be-flattened rope is allocated, the
174
* allocation size is rounded up. Then, if the resulting flat string is the
175
* left-hand side of a new rope that gets flattened and there is enough
176
* capacity, the rope is flattened into the same buffer, thereby avoiding
177
* copying the left-hand side. Clearing the 'extensible' bit turns off this
178
* optimization. This is necessary, e.g., when the JSAPI hands out the raw
179
* null-terminated char array of a flat string.
181
* N.B. This optimization can create chains of dependent strings.
183
const size_t wholeLength = length();
184
size_t wholeCapacity;
186
JSString *str = this;
188
JSCompartment *comp = compartment();
190
if (this->leftChild()->isExtensible()) {
191
JSExtensibleString &left = this->leftChild()->asExtensible();
192
size_t capacity = left.capacity();
193
if (capacity >= wholeLength) {
194
if (b == WithIncrementalBarrier) {
195
JSString::writeBarrierPre(d.u1.left);
196
JSString::writeBarrierPre(d.s.u2.right);
199
wholeCapacity = capacity;
200
wholeChars = const_cast<jschar *>(left.chars());
201
size_t bits = left.d.lengthAndFlags;
202
pos = wholeChars + (bits >> LENGTH_SHIFT);
203
JS_STATIC_ASSERT(!(EXTENSIBLE_FLAGS & DEPENDENT_FLAGS));
204
left.d.lengthAndFlags = bits ^ (EXTENSIBLE_FLAGS | DEPENDENT_FLAGS);
205
left.d.s.u2.base = (JSLinearString *)this; /* will be true on exit */
206
StringWriteBarrierPostRemove(comp, &left.d.u1.left);
207
StringWriteBarrierPost(comp, (JSString **)&left.d.s.u2.base);
208
goto visit_right_child;
212
if (!AllocChars(maybecx, wholeLength, &wholeChars, &wholeCapacity))
217
if (b == WithIncrementalBarrier) {
218
JSString::writeBarrierPre(str->d.u1.left);
219
JSString::writeBarrierPre(str->d.s.u2.right);
222
JSString &left = *str->d.u1.left;
223
str->d.u1.chars = pos;
224
StringWriteBarrierPostRemove(comp, &str->d.u1.left);
226
left.d.s.u3.parent = str; /* Return to this when 'left' done, */
227
left.d.lengthAndFlags = 0x200; /* but goto visit_right_child. */
229
goto first_visit_node;
231
size_t len = left.length();
232
PodCopy(pos, left.d.u1.chars, len);
236
JSString &right = *str->d.s.u2.right;
237
if (right.isRope()) {
238
right.d.s.u3.parent = str; /* Return to this node when 'right' done, */
239
right.d.lengthAndFlags = 0x300; /* but goto finish_node. */
241
goto first_visit_node;
243
size_t len = right.length();
244
PodCopy(pos, right.d.u1.chars, len);
249
JS_ASSERT(pos == wholeChars + wholeLength);
251
str->d.lengthAndFlags = buildLengthAndFlags(wholeLength, EXTENSIBLE_FLAGS);
252
str->d.u1.chars = wholeChars;
253
str->d.s.u2.capacity = wholeCapacity;
254
StringWriteBarrierPostRemove(comp, &str->d.u1.left);
255
StringWriteBarrierPostRemove(comp, &str->d.s.u2.right);
256
return &this->asFlat();
258
size_t progress = str->d.lengthAndFlags;
259
str->d.lengthAndFlags = buildLengthAndFlags(pos - str->d.u1.chars, DEPENDENT_FLAGS);
260
str->d.s.u2.base = (JSLinearString *)this; /* will be true on exit */
261
StringWriteBarrierPost(comp, (JSString **)&str->d.s.u2.base);
262
str = str->d.s.u3.parent;
263
if (progress == 0x200)
264
goto visit_right_child;
265
JS_ASSERT(progress == 0x300);
271
JSRope::flatten(JSContext *maybecx)
274
if (compartment()->needsBarrier())
275
return flattenInternal<WithIncrementalBarrier>(maybecx);
277
return flattenInternal<NoBarrier>(maybecx);
279
return flattenInternal<NoBarrier>(maybecx);
283
JSString * JS_FASTCALL
284
js_ConcatStrings(JSContext *cx, HandleString left, HandleString right)
286
JS_ASSERT_IF(!left->isAtom(), left->compartment() == cx->compartment);
287
JS_ASSERT_IF(!right->isAtom(), right->compartment() == cx->compartment);
289
size_t leftLen = left->length();
293
size_t rightLen = right->length();
297
size_t wholeLength = leftLen + rightLen;
298
if (!JSString::validateLength(cx, wholeLength))
301
if (JSShortString::lengthFits(wholeLength)) {
302
JSShortString *str = js_NewGCShortString(cx);
305
const jschar *leftChars = left->getChars(cx);
308
const jschar *rightChars = right->getChars(cx);
312
jschar *buf = str->init(wholeLength);
313
PodCopy(buf, leftChars, leftLen);
314
PodCopy(buf + leftLen, rightChars, rightLen);
315
buf[wholeLength] = 0;
319
return JSRope::new_(cx, left, right, wholeLength);
323
JSDependentString::undepend(JSContext *cx)
325
JS_ASSERT(JSString::isDependent());
328
* We destroy the base() pointer in undepend, so we need a pre-barrier. We
329
* don't need a post-barrier because there aren't any outgoing pointers
332
JSString::writeBarrierPre(base());
335
size_t size = (n + 1) * sizeof(jschar);
336
jschar *s = (jschar *) cx->malloc_(size);
340
PodCopy(s, chars(), n);
345
* Transform *this into an undepended string so 'base' will remain rooted
346
* for the benefit of any other dependent string that depends on *this.
348
d.lengthAndFlags = buildLengthAndFlags(n, UNDEPENDED_FLAGS);
350
return &this->asFixed();
354
JSFlatString::isIndexSlow(uint32_t *indexp) const
356
const jschar *s = charsZ();
363
if (n > UINT32_CHAR_BUFFER_LENGTH)
367
* Make sure to account for the '\0' at the end of characters, dereferenced
370
RangedPtr<const jschar> cp(s, n + 1);
371
const RangedPtr<const jschar> end(s + n, s, n + 1);
373
uint32_t index = JS7_UNDEC(*cp++);
374
uint32_t oldIndex = 0;
378
while (JS7_ISDEC(*cp)) {
381
index = 10 * index + c;
386
/* It's not an element if there are characters after the number. */
391
* Look out for "4294967296" and larger-number strings that fit in
392
* UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers shall pass.
394
if (oldIndex < UINT32_MAX / 10 || (oldIndex == UINT32_MAX / 10 && c <= (UINT32_MAX % 10))) {
403
* Set up some tools to make it easier to generate large tables. After constant
404
* folding, for each n, Rn(0) is the comma-separated list R(0), R(1), ..., R(2^n-1).
405
* Similary, Rn(k) (for any k and n) generates the list R(k), R(k+1), ..., R(k+2^n-1).
406
* To use this, define R appropriately, then use Rn(0) (for some value of n), then
409
#define R2(n) R(n), R((n) + (1 << 0)), R((n) + (2 << 0)), R((n) + (3 << 0))
410
#define R4(n) R2(n), R2((n) + (1 << 2)), R2((n) + (2 << 2)), R2((n) + (3 << 2))
411
#define R6(n) R4(n), R4((n) + (1 << 4)), R4((n) + (2 << 4)), R4((n) + (3 << 4))
412
#define R7(n) R6(n), R6((n) + (1 << 6))
415
* This is used when we generate our table of short strings, so the compiler is
416
* happier if we use |c| as few times as possible.
418
#define FROM_SMALL_CHAR(c) jschar((c) + ((c) < 10 ? '0' : \
419
(c) < 36 ? 'a' - 10 : \
423
* Declare length-2 strings. We only store strings where both characters are
424
* alphanumeric. The lower 10 short chars are the numerals, the next 26 are
425
* the lowercase letters, and the next 26 are the uppercase letters.
427
#define TO_SMALL_CHAR(c) ((c) >= '0' && (c) <= '9' ? (c) - '0' : \
428
(c) >= 'a' && (c) <= 'z' ? (c) - 'a' + 10 : \
429
(c) >= 'A' && (c) <= 'Z' ? (c) - 'A' + 36 : \
430
StaticStrings::INVALID_SMALL_CHAR)
432
#define R TO_SMALL_CHAR
433
const StaticStrings::SmallChar StaticStrings::toSmallChar[] = { R7(0) };
437
StaticStrings::init(JSContext *cx)
439
AutoEnterAtomsCompartment ac(cx);
441
for (uint32_t i = 0; i < UNIT_STATIC_LIMIT; i++) {
442
jschar buffer[] = { jschar(i), '\0' };
443
JSFixedString *s = js_NewStringCopyN(cx, buffer, 1);
446
unitStaticTable[i] = s->morphAtomizedStringIntoAtom();
449
for (uint32_t i = 0; i < NUM_SMALL_CHARS * NUM_SMALL_CHARS; i++) {
450
jschar buffer[] = { FROM_SMALL_CHAR(i >> 6), FROM_SMALL_CHAR(i & 0x3F), '\0' };
451
JSFixedString *s = js_NewStringCopyN(cx, buffer, 2);
454
length2StaticTable[i] = s->morphAtomizedStringIntoAtom();
457
for (uint32_t i = 0; i < INT_STATIC_LIMIT; i++) {
459
intStaticTable[i] = unitStaticTable[i + '0'];
460
} else if (i < 100) {
461
size_t index = ((size_t)TO_SMALL_CHAR((i / 10) + '0') << 6) +
462
TO_SMALL_CHAR((i % 10) + '0');
463
intStaticTable[i] = length2StaticTable[index];
465
jschar buffer[] = { jschar('0' + (i / 100)),
466
jschar('0' + ((i / 10) % 10)),
467
jschar('0' + (i % 10)),
469
JSFixedString *s = js_NewStringCopyN(cx, buffer, 3);
472
intStaticTable[i] = s->morphAtomizedStringIntoAtom();
480
StaticStrings::trace(JSTracer *trc)
482
/* These strings never change, so barriers are not needed. */
484
for (uint32_t i = 0; i < UNIT_STATIC_LIMIT; i++) {
485
if (unitStaticTable[i])
486
MarkStringUnbarriered(trc, &unitStaticTable[i], "unit-static-string");
489
for (uint32_t i = 0; i < NUM_SMALL_CHARS * NUM_SMALL_CHARS; i++) {
490
if (length2StaticTable[i])
491
MarkStringUnbarriered(trc, &length2StaticTable[i], "length2-static-string");
494
/* This may mark some strings more than once, but so be it. */
495
for (uint32_t i = 0; i < INT_STATIC_LIMIT; i++) {
496
if (intStaticTable[i])
497
MarkStringUnbarriered(trc, &intStaticTable[i], "int-static-string");
502
StaticStrings::isStatic(JSAtom *atom)
504
const jschar *chars = atom->chars();
505
switch (atom->length()) {
507
return (chars[0] < UNIT_STATIC_LIMIT);
509
return (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1]));
511
if ('1' <= chars[0] && chars[0] <= '9' &&
512
'0' <= chars[1] && chars[1] <= '9' &&
513
'0' <= chars[2] && chars[2] <= '9') {
514
int i = (chars[0] - '0') * 100 +
515
(chars[1] - '0') * 10 +
518
return (unsigned(i) < INT_STATIC_LIMIT);
530
fprintf(stderr, "JSAtom* (%p) = ", (void *) this);
531
this->JSString::dump();