1
// Copyright 2012 the V8 project authors. All rights reserved.
2
// Redistribution and use in source and binary forms, with or without
3
// modification, are permitted provided that the following conditions are
6
// * Redistributions of source code must retain the above copyright
7
// notice, this list of conditions and the following disclaimer.
8
// * Redistributions in binary form must reproduce the above
9
// copyright notice, this list of conditions and the following
10
// disclaimer in the documentation and/or other materials provided
11
// with the distribution.
12
// * Neither the name of Google Inc. nor the names of its
13
// contributors may be used to endorse or promote products derived
14
// from this software without specific prior written permission.
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
#include "assembler.h"
31
#include "regexp-stack.h"
32
#include "regexp-macro-assembler.h"
33
#include "simulator.h"
38
RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
39
: slow_safe_compiler_(false),
40
global_mode_(NOT_GLOBAL),
45
RegExpMacroAssembler::~RegExpMacroAssembler() {
49
bool RegExpMacroAssembler::CanReadUnaligned() {
50
#ifdef V8_HOST_CAN_READ_UNALIGNED
58
#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
60
NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
61
: RegExpMacroAssembler(zone) {
65
NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
69
bool NativeRegExpMacroAssembler::CanReadUnaligned() {
70
#ifdef V8_TARGET_CAN_READ_UNALIGNED
77
const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
80
// Not just flat, but ultra flat.
81
ASSERT(subject->IsExternalString() || subject->IsSeqString());
82
ASSERT(start_index >= 0);
83
ASSERT(start_index <= subject->length());
84
if (subject->IsAsciiRepresentation()) {
86
if (StringShape(subject).IsExternal()) {
87
const char* data = ExternalAsciiString::cast(subject)->GetChars();
88
address = reinterpret_cast<const byte*>(data);
90
ASSERT(subject->IsSeqAsciiString());
91
char* data = SeqAsciiString::cast(subject)->GetChars();
92
address = reinterpret_cast<const byte*>(data);
94
return address + start_index;
97
if (StringShape(subject).IsExternal()) {
98
data = ExternalTwoByteString::cast(subject)->GetChars();
100
ASSERT(subject->IsSeqTwoByteString());
101
data = SeqTwoByteString::cast(subject)->GetChars();
103
return reinterpret_cast<const byte*>(data + start_index);
107
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
108
Handle<Code> regexp_code,
109
Handle<String> subject,
111
int offsets_vector_length,
115
ASSERT(subject->IsFlat());
116
ASSERT(previous_index >= 0);
117
ASSERT(previous_index <= subject->length());
119
// No allocations before calling the regexp, but we can't use
120
// AssertNoAllocation, since regexps might be preempted, and another thread
121
// might do allocation anyway.
123
String* subject_ptr = *subject;
124
// Character offsets into string.
125
int start_offset = previous_index;
126
int char_length = subject_ptr->length() - start_offset;
127
int slice_offset = 0;
129
// The string has been flattened, so if it is a cons string it contains the
130
// full string in the first part.
131
if (StringShape(subject_ptr).IsCons()) {
132
ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
133
subject_ptr = ConsString::cast(subject_ptr)->first();
134
} else if (StringShape(subject_ptr).IsSliced()) {
135
SlicedString* slice = SlicedString::cast(subject_ptr);
136
subject_ptr = slice->parent();
137
slice_offset = slice->offset();
139
// Ensure that an underlying string has the same ASCII-ness.
140
bool is_ascii = subject_ptr->IsAsciiRepresentation();
141
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
142
// String is now either Sequential or External
143
int char_size_shift = is_ascii ? 0 : 1;
145
const byte* input_start =
146
StringCharacterPosition(subject_ptr, start_offset + slice_offset);
147
int byte_length = char_length << char_size_shift;
148
const byte* input_end = input_start + byte_length;
149
Result res = Execute(*regexp_code,
155
offsets_vector_length,
161
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
163
String* input, // This needs to be the unpacked (sliced, cons) string.
165
const byte* input_start,
166
const byte* input_end,
170
ASSERT(isolate == Isolate::Current());
171
// Ensure that the minimum stack has been allocated.
172
RegExpStackScope stack_scope(isolate);
173
Address stack_base = stack_scope.stack()->stack_base();
176
int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
186
ASSERT(result >= RETRY);
188
if (result == EXCEPTION && !isolate->has_pending_exception()) {
189
// We detected a stack overflow (on the backtrack stack) in RegExp code,
190
// but haven't created the exception yet.
191
isolate->StackOverflow();
193
return static_cast<Result>(result);
197
const byte NativeRegExpMacroAssembler::word_character_map[] = {
198
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
203
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
204
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
205
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
206
0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
208
0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
209
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
210
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
211
0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
213
0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
214
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
215
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
216
0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
220
int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
221
Address byte_offset1,
222
Address byte_offset2,
225
ASSERT(isolate == Isolate::Current());
226
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
227
isolate->regexp_macro_assembler_canonicalize();
228
// This function is not allowed to cause a garbage collection.
229
// A GC might move the calling generated code and invalidate the
230
// return address on the stack.
231
ASSERT(byte_length % 2 == 0);
232
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
233
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
234
size_t length = byte_length >> 1;
236
for (size_t i = 0; i < length; i++) {
237
unibrow::uchar c1 = substring1[i];
238
unibrow::uchar c2 = substring2[i];
240
unibrow::uchar s1[1] = { c1 };
241
canonicalize->get(c1, '\0', s1);
243
unibrow::uchar s2[1] = { c2 };
244
canonicalize->get(c2, '\0', s2);
245
if (s1[0] != s2[0]) {
255
Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
258
ASSERT(isolate == Isolate::Current());
259
RegExpStack* regexp_stack = isolate->regexp_stack();
260
size_t size = regexp_stack->stack_capacity();
261
Address old_stack_base = regexp_stack->stack_base();
262
ASSERT(old_stack_base == *stack_base);
263
ASSERT(stack_pointer <= old_stack_base);
264
ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
265
Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
266
if (new_stack_base == NULL) {
269
*stack_base = new_stack_base;
270
intptr_t stack_content_size = old_stack_base - stack_pointer;
271
return new_stack_base - stack_content_size;
274
#endif // V8_INTERPRETED_REGEXP
276
} } // namespace v8::internal