~oif-team/ubuntu/natty/qt4-x11/xi2.1

« back to all changes in this revision

Viewing changes to src/3rdparty/webkit/JavaScriptCore/yarr/RegexJIT.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Alessandro Ghersi
  • Date: 2009-11-02 18:30:08 UTC
  • mfrom: (1.2.2 upstream)
  • mto: (15.2.5 experimental)
  • mto: This revision was merged to the branch mainline in revision 88.
  • Revision ID: james.westby@ubuntu.com-20091102183008-b6a4gcs128mvfb3m
Tags: upstream-4.6.0~beta1
ImportĀ upstreamĀ versionĀ 4.6.0~beta1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (C) 2009 Apple Inc. All rights reserved.
 
3
 *
 
4
 * Redistribution and use in source and binary forms, with or without
 
5
 * modification, are permitted provided that the following conditions
 
6
 * are met:
 
7
 * 1. Redistributions of source code must retain the above copyright
 
8
 *    notice, this list of conditions and the following disclaimer.
 
9
 * 2. Redistributions in binary form must reproduce the above copyright
 
10
 *    notice, this list of conditions and the following disclaimer in the
 
11
 *    documentation and/or other materials provided with the distribution.
 
12
 *
 
13
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 
14
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 
15
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 
16
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 
17
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 
18
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 
19
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 
20
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 
21
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 
22
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 
23
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 
24
 */
 
25
 
 
26
#include "config.h"
 
27
#include "RegexJIT.h"
 
28
 
 
29
#include "ASCIICType.h"
 
30
#include "JSGlobalData.h"
 
31
#include "LinkBuffer.h"
 
32
#include "MacroAssembler.h"
 
33
#include "RegexCompiler.h"
 
34
 
 
35
#include "pcre.h" // temporary, remove when fallback is removed.
 
36
 
 
37
#if ENABLE(YARR_JIT)
 
38
 
 
39
using namespace WTF;
 
40
 
 
41
namespace JSC { namespace Yarr {
 
42
 
 
43
 
 
44
class RegexGenerator : private MacroAssembler {
 
45
    friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
 
46
 
 
47
#if PLATFORM(ARM)
 
48
    static const RegisterID input = ARMRegisters::r0;
 
49
    static const RegisterID index = ARMRegisters::r1;
 
50
    static const RegisterID length = ARMRegisters::r2;
 
51
    static const RegisterID output = ARMRegisters::r4;
 
52
 
 
53
    static const RegisterID regT0 = ARMRegisters::r5;
 
54
    static const RegisterID regT1 = ARMRegisters::r6;
 
55
 
 
56
    static const RegisterID returnRegister = ARMRegisters::r0;
 
57
#elif PLATFORM(X86)
 
58
    static const RegisterID input = X86Registers::eax;
 
59
    static const RegisterID index = X86Registers::edx;
 
60
    static const RegisterID length = X86Registers::ecx;
 
61
    static const RegisterID output = X86Registers::edi;
 
62
 
 
63
    static const RegisterID regT0 = X86Registers::ebx;
 
64
    static const RegisterID regT1 = X86Registers::esi;
 
65
 
 
66
    static const RegisterID returnRegister = X86Registers::eax;
 
67
#elif PLATFORM(X86_64)
 
68
    static const RegisterID input = X86Registers::edi;
 
69
    static const RegisterID index = X86Registers::esi;
 
70
    static const RegisterID length = X86Registers::edx;
 
71
    static const RegisterID output = X86Registers::ecx;
 
72
 
 
73
    static const RegisterID regT0 = X86Registers::eax;
 
74
    static const RegisterID regT1 = X86Registers::ebx;
 
75
 
 
76
    static const RegisterID returnRegister = X86Registers::eax;
 
77
#endif
 
78
 
 
79
    void optimizeAlternative(PatternAlternative* alternative)
 
80
    {
 
81
        if (!alternative->m_terms.size())
 
82
            return;
 
83
 
 
84
        for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
 
85
            PatternTerm& term = alternative->m_terms[i];
 
86
            PatternTerm& nextTerm = alternative->m_terms[i + 1];
 
87
 
 
88
            if ((term.type == PatternTerm::TypeCharacterClass)
 
89
                && (term.quantityType == QuantifierFixedCount)
 
90
                && (nextTerm.type == PatternTerm::TypePatternCharacter)
 
91
                && (nextTerm.quantityType == QuantifierFixedCount)) {
 
92
                PatternTerm termCopy = term;
 
93
                alternative->m_terms[i] = nextTerm;
 
94
                alternative->m_terms[i + 1] = termCopy;
 
95
            }
 
96
        }
 
97
    }
 
98
 
 
99
    void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
 
100
    {
 
101
        do {
 
102
            // pick which range we're going to generate
 
103
            int which = count >> 1;
 
104
            char lo = ranges[which].begin;
 
105
            char hi = ranges[which].end;
 
106
            
 
107
            // check if there are any ranges or matches below lo.  If not, just jl to failure -
 
108
            // if there is anything else to check, check that first, if it falls through jmp to failure.
 
109
            if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
 
110
                Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
 
111
                
 
112
                // generate code for all ranges before this one
 
113
                if (which)
 
114
                    matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
 
115
                
 
116
                while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
 
117
                    matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
 
118
                    ++*matchIndex;
 
119
                }
 
120
                failures.append(jump());
 
121
 
 
122
                loOrAbove.link(this);
 
123
            } else if (which) {
 
124
                Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
 
125
 
 
126
                matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
 
127
                failures.append(jump());
 
128
 
 
129
                loOrAbove.link(this);
 
130
            } else
 
131
                failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
 
132
 
 
133
            while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
 
134
                ++*matchIndex;
 
135
 
 
136
            matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
 
137
            // fall through to here, the value is above hi.
 
138
 
 
139
            // shuffle along & loop around if there are any more matches to handle.
 
140
            unsigned next = which + 1;
 
141
            ranges += next;
 
142
            count -= next;
 
143
        } while (count);
 
144
    }
 
145
 
 
146
    void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
 
147
    {
 
148
        Jump unicodeFail;
 
149
        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
 
150
            Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
 
151
        
 
152
            if (charClass->m_matchesUnicode.size()) {
 
153
                for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
 
154
                    UChar ch = charClass->m_matchesUnicode[i];
 
155
                    matchDest.append(branch32(Equal, character, Imm32(ch)));
 
156
                }
 
157
            }
 
158
            
 
159
            if (charClass->m_rangesUnicode.size()) {
 
160
                for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
 
161
                    UChar lo = charClass->m_rangesUnicode[i].begin;
 
162
                    UChar hi = charClass->m_rangesUnicode[i].end;
 
163
                    
 
164
                    Jump below = branch32(LessThan, character, Imm32(lo));
 
165
                    matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
 
166
                    below.link(this);
 
167
                }
 
168
            }
 
169
 
 
170
            unicodeFail = jump();
 
171
            isAscii.link(this);
 
172
        }
 
173
 
 
174
        if (charClass->m_ranges.size()) {
 
175
            unsigned matchIndex = 0;
 
176
            JumpList failures; 
 
177
            matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
 
178
            while (matchIndex < charClass->m_matches.size())
 
179
                matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
 
180
 
 
181
            failures.link(this);
 
182
        } else if (charClass->m_matches.size()) {
 
183
            // optimization: gather 'a','A' etc back together, can mask & test once.
 
184
            Vector<char> matchesAZaz;
 
185
 
 
186
            for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
 
187
                char ch = charClass->m_matches[i];
 
188
                if (m_pattern.m_ignoreCase) {
 
189
                    if (isASCIILower(ch)) {
 
190
                        matchesAZaz.append(ch);
 
191
                        continue;
 
192
                    }
 
193
                    if (isASCIIUpper(ch))
 
194
                        continue;
 
195
                }
 
196
                matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
 
197
            }
 
198
 
 
199
            if (unsigned countAZaz = matchesAZaz.size()) {
 
200
                or32(Imm32(32), character);
 
201
                for (unsigned i = 0; i < countAZaz; ++i)
 
202
                    matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
 
203
            }
 
204
        }
 
205
 
 
206
        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
 
207
            unicodeFail.link(this);
 
208
    }
 
209
 
 
210
    // Jumps if input not available; will have (incorrectly) incremented already!
 
211
    Jump jumpIfNoAvailableInput(unsigned countToCheck)
 
212
    {
 
213
        add32(Imm32(countToCheck), index);
 
214
        return branch32(Above, index, length);
 
215
    }
 
216
 
 
217
    Jump jumpIfAvailableInput(unsigned countToCheck)
 
218
    {
 
219
        add32(Imm32(countToCheck), index);
 
220
        return branch32(BelowOrEqual, index, length);
 
221
    }
 
222
 
 
223
    Jump checkInput()
 
224
    {
 
225
        return branch32(BelowOrEqual, index, length);
 
226
    }
 
227
 
 
228
    Jump atEndOfInput()
 
229
    {
 
230
        return branch32(Equal, index, length);
 
231
    }
 
232
 
 
233
    Jump notAtEndOfInput()
 
234
    {
 
235
        return branch32(NotEqual, index, length);
 
236
    }
 
237
 
 
238
    Jump jumpIfCharEquals(UChar ch, int inputPosition)
 
239
    {
 
240
        return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
 
241
    }
 
242
 
 
243
    Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
 
244
    {
 
245
        return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
 
246
    }
 
247
 
 
248
    void readCharacter(int inputPosition, RegisterID reg)
 
249
    {
 
250
        load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
 
251
    }
 
252
 
 
253
    void storeToFrame(RegisterID reg, unsigned frameLocation)
 
254
    {
 
255
        poke(reg, frameLocation);
 
256
    }
 
257
 
 
258
    void storeToFrame(Imm32 imm, unsigned frameLocation)
 
259
    {
 
260
        poke(imm, frameLocation);
 
261
    }
 
262
 
 
263
    DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
 
264
    {
 
265
        return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
 
266
    }
 
267
 
 
268
    void loadFromFrame(unsigned frameLocation, RegisterID reg)
 
269
    {
 
270
        peek(reg, frameLocation);
 
271
    }
 
272
 
 
273
    void loadFromFrameAndJump(unsigned frameLocation)
 
274
    {
 
275
        jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
 
276
    }
 
277
 
 
278
    struct AlternativeBacktrackRecord {
 
279
        DataLabelPtr dataLabel;
 
280
        Label backtrackLocation;
 
281
 
 
282
        AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
 
283
            : dataLabel(dataLabel)
 
284
            , backtrackLocation(backtrackLocation)
 
285
        {
 
286
        }
 
287
    };
 
288
 
 
289
    struct TermGenerationState {
 
290
        TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
 
291
            : disjunction(disjunction)
 
292
            , checkedTotal(checkedTotal)
 
293
        {
 
294
        }
 
295
 
 
296
        void resetAlternative()
 
297
        {
 
298
            isBackTrackGenerated = false;
 
299
            alt = 0;
 
300
        }
 
301
        bool alternativeValid()
 
302
        {
 
303
            return alt < disjunction->m_alternatives.size();
 
304
        }
 
305
        void nextAlternative()
 
306
        {
 
307
            ++alt;
 
308
        }
 
309
        PatternAlternative* alternative()
 
310
        {
 
311
            return disjunction->m_alternatives[alt];
 
312
        }
 
313
 
 
314
        void resetTerm()
 
315
        {
 
316
            ASSERT(alternativeValid());
 
317
            t = 0;
 
318
        }
 
319
        bool termValid()
 
320
        {
 
321
            ASSERT(alternativeValid());
 
322
            return t < alternative()->m_terms.size();
 
323
        }
 
324
        void nextTerm()
 
325
        {
 
326
            ASSERT(alternativeValid());
 
327
            ++t;
 
328
        }
 
329
        PatternTerm& term()
 
330
        {
 
331
            ASSERT(alternativeValid());
 
332
            return alternative()->m_terms[t];
 
333
        }
 
334
 
 
335
        PatternTerm& lookaheadTerm()
 
336
        {
 
337
            ASSERT(alternativeValid());
 
338
            ASSERT((t + 1) < alternative()->m_terms.size());
 
339
            return alternative()->m_terms[t + 1];
 
340
        }
 
341
        bool isSinglePatternCharacterLookaheadTerm()
 
342
        {
 
343
            ASSERT(alternativeValid());
 
344
            return ((t + 1) < alternative()->m_terms.size())
 
345
                && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
 
346
                && (lookaheadTerm().quantityType == QuantifierFixedCount)
 
347
                && (lookaheadTerm().quantityCount == 1);
 
348
        }
 
349
 
 
350
        int inputOffset()
 
351
        {
 
352
            return term().inputPosition - checkedTotal;
 
353
        }
 
354
 
 
355
        void jumpToBacktrack(Jump jump, MacroAssembler* masm)
 
356
        {
 
357
            if (isBackTrackGenerated)
 
358
                jump.linkTo(backtrackLabel, masm);
 
359
            else
 
360
                backTrackJumps.append(jump);
 
361
        }
 
362
        void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
 
363
        {
 
364
            if (isBackTrackGenerated)
 
365
                jumps.linkTo(backtrackLabel, masm);
 
366
            else
 
367
                backTrackJumps.append(jumps);
 
368
        }
 
369
        bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
 
370
        {
 
371
            if (isBackTrackGenerated) {
 
372
                masm->jump(backtrackLabel);
 
373
                return true;
 
374
            }
 
375
            return false;
 
376
        }
 
377
        void addBacktrackJump(Jump jump)
 
378
        {
 
379
            backTrackJumps.append(jump);
 
380
        }
 
381
        void setBacktrackGenerated(Label label)
 
382
        {
 
383
            isBackTrackGenerated = true;
 
384
            backtrackLabel = label;
 
385
        }
 
386
        void linkAlternativeBacktracks(MacroAssembler* masm)
 
387
        {
 
388
            isBackTrackGenerated = false;
 
389
            backTrackJumps.link(masm);
 
390
        }
 
391
        void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
 
392
        {
 
393
            isBackTrackGenerated = false;
 
394
            backTrackJumps.linkTo(label, masm);
 
395
        }
 
396
        void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
 
397
        {
 
398
            jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
 
399
            if (nestedParenthesesState.isBackTrackGenerated)
 
400
                setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
 
401
        }
 
402
 
 
403
        PatternDisjunction* disjunction;
 
404
        int checkedTotal;
 
405
    private:
 
406
        unsigned alt;
 
407
        unsigned t;
 
408
        JumpList backTrackJumps;
 
409
        Label backtrackLabel;
 
410
        bool isBackTrackGenerated;
 
411
    };
 
412
 
 
413
    void generateAssertionBOL(TermGenerationState& state)
 
414
    {
 
415
        PatternTerm& term = state.term();
 
416
 
 
417
        if (m_pattern.m_multiline) {
 
418
            const RegisterID character = regT0;
 
419
 
 
420
            JumpList matchDest;
 
421
            if (!term.inputPosition)
 
422
                matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
 
423
 
 
424
            readCharacter(state.inputOffset() - 1, character);
 
425
            matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
 
426
            state.jumpToBacktrack(jump(), this);
 
427
 
 
428
            matchDest.link(this);
 
429
        } else {
 
430
            // Erk, really should poison out these alternatives early. :-/
 
431
            if (term.inputPosition)
 
432
                state.jumpToBacktrack(jump(), this);
 
433
            else
 
434
                state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
 
435
        }
 
436
    }
 
437
 
 
438
    void generateAssertionEOL(TermGenerationState& state)
 
439
    {
 
440
        PatternTerm& term = state.term();
 
441
 
 
442
        if (m_pattern.m_multiline) {
 
443
            const RegisterID character = regT0;
 
444
 
 
445
            JumpList matchDest;
 
446
            if (term.inputPosition == state.checkedTotal)
 
447
                matchDest.append(atEndOfInput());
 
448
 
 
449
            readCharacter(state.inputOffset(), character);
 
450
            matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
 
451
            state.jumpToBacktrack(jump(), this);
 
452
 
 
453
            matchDest.link(this);
 
454
        } else {
 
455
            if (term.inputPosition == state.checkedTotal)
 
456
                state.jumpToBacktrack(notAtEndOfInput(), this);
 
457
            // Erk, really should poison out these alternatives early. :-/
 
458
            else
 
459
                state.jumpToBacktrack(jump(), this);
 
460
        }
 
461
    }
 
462
 
 
463
    // Also falls though on nextIsNotWordChar.
 
464
    void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
 
465
    {
 
466
        const RegisterID character = regT0;
 
467
        PatternTerm& term = state.term();
 
468
 
 
469
        if (term.inputPosition == state.checkedTotal)
 
470
            nextIsNotWordChar.append(atEndOfInput());
 
471
 
 
472
        readCharacter(state.inputOffset(), character);
 
473
        matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
 
474
    }
 
475
 
 
476
    void generateAssertionWordBoundary(TermGenerationState& state)
 
477
    {
 
478
        const RegisterID character = regT0;
 
479
        PatternTerm& term = state.term();
 
480
 
 
481
        Jump atBegin;
 
482
        JumpList matchDest;
 
483
        if (!term.inputPosition)
 
484
            atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
 
485
        readCharacter(state.inputOffset() - 1, character);
 
486
        matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
 
487
        if (!term.inputPosition)
 
488
            atBegin.link(this);
 
489
 
 
490
        // We fall through to here if the last character was not a wordchar.
 
491
        JumpList nonWordCharThenWordChar;
 
492
        JumpList nonWordCharThenNonWordChar;
 
493
        if (term.invertOrCapture) {
 
494
            matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
 
495
            nonWordCharThenWordChar.append(jump());
 
496
        } else {
 
497
            matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
 
498
            nonWordCharThenNonWordChar.append(jump());
 
499
        }
 
500
        state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
 
501
 
 
502
        // We jump here if the last character was a wordchar.
 
503
        matchDest.link(this);
 
504
        JumpList wordCharThenWordChar;
 
505
        JumpList wordCharThenNonWordChar;
 
506
        if (term.invertOrCapture) {
 
507
            matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
 
508
            wordCharThenWordChar.append(jump());
 
509
        } else {
 
510
            matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
 
511
            // This can fall-though!
 
512
        }
 
513
 
 
514
        state.jumpToBacktrack(wordCharThenWordChar, this);
 
515
        
 
516
        nonWordCharThenWordChar.link(this);
 
517
        wordCharThenNonWordChar.link(this);
 
518
    }
 
519
 
 
520
    void generatePatternCharacterSingle(TermGenerationState& state)
 
521
    {
 
522
        const RegisterID character = regT0;
 
523
        UChar ch = state.term().patternCharacter;
 
524
 
 
525
        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
 
526
            readCharacter(state.inputOffset(), character);
 
527
            or32(Imm32(32), character);
 
528
            state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
 
529
        } else {
 
530
            ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
 
531
            state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
 
532
        }
 
533
    }
 
534
 
 
535
    void generatePatternCharacterPair(TermGenerationState& state)
 
536
    {
 
537
        const RegisterID character = regT0;
 
538
        UChar ch1 = state.term().patternCharacter;
 
539
        UChar ch2 = state.lookaheadTerm().patternCharacter;
 
540
 
 
541
        int mask = 0;
 
542
        int chPair = ch1 | (ch2 << 16);
 
543
        
 
544
        if (m_pattern.m_ignoreCase) {
 
545
            if (isASCIIAlpha(ch1))
 
546
                mask |= 32;
 
547
            if (isASCIIAlpha(ch2))
 
548
                mask |= 32 << 16;
 
549
        }
 
550
 
 
551
        if (mask) {
 
552
            load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
 
553
            or32(Imm32(mask), character);
 
554
            state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
 
555
        } else
 
556
            state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
 
557
    }
 
558
 
 
559
    void generatePatternCharacterFixed(TermGenerationState& state)
 
560
    {
 
561
        const RegisterID character = regT0;
 
562
        const RegisterID countRegister = regT1;
 
563
        PatternTerm& term = state.term();
 
564
        UChar ch = term.patternCharacter;
 
565
 
 
566
        move(index, countRegister);
 
567
        sub32(Imm32(term.quantityCount), countRegister);
 
568
 
 
569
        Label loop(this);
 
570
        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
 
571
            load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
 
572
            or32(Imm32(32), character);
 
573
            state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
 
574
        } else {
 
575
            ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
 
576
            state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
 
577
        }
 
578
        add32(Imm32(1), countRegister);
 
579
        branch32(NotEqual, countRegister, index).linkTo(loop, this);
 
580
    }
 
581
 
 
582
    void generatePatternCharacterGreedy(TermGenerationState& state)
 
583
    {
 
584
        const RegisterID character = regT0;
 
585
        const RegisterID countRegister = regT1;
 
586
        PatternTerm& term = state.term();
 
587
        UChar ch = term.patternCharacter;
 
588
    
 
589
        move(Imm32(0), countRegister);
 
590
 
 
591
        JumpList failures;
 
592
        Label loop(this);
 
593
        failures.append(atEndOfInput());
 
594
        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
 
595
            readCharacter(state.inputOffset(), character);
 
596
            or32(Imm32(32), character);
 
597
            failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
 
598
        } else {
 
599
            ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
 
600
            failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
 
601
        }
 
602
        add32(Imm32(1), countRegister);
 
603
        add32(Imm32(1), index);
 
604
        branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
 
605
        failures.append(jump());
 
606
 
 
607
        Label backtrackBegin(this);
 
608
        loadFromFrame(term.frameLocation, countRegister);
 
609
        state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
 
610
        sub32(Imm32(1), countRegister);
 
611
        sub32(Imm32(1), index);
 
612
 
 
613
        failures.link(this);
 
614
 
 
615
        storeToFrame(countRegister, term.frameLocation);
 
616
 
 
617
        state.setBacktrackGenerated(backtrackBegin);
 
618
    }
 
619
 
 
620
    void generatePatternCharacterNonGreedy(TermGenerationState& state)
 
621
    {
 
622
        const RegisterID character = regT0;
 
623
        const RegisterID countRegister = regT1;
 
624
        PatternTerm& term = state.term();
 
625
        UChar ch = term.patternCharacter;
 
626
    
 
627
        move(Imm32(0), countRegister);
 
628
 
 
629
        Jump firstTimeDoNothing = jump();
 
630
 
 
631
        Label hardFail(this);
 
632
        sub32(countRegister, index);
 
633
        state.jumpToBacktrack(jump(), this);
 
634
 
 
635
        Label backtrackBegin(this);
 
636
        loadFromFrame(term.frameLocation, countRegister);
 
637
 
 
638
        atEndOfInput().linkTo(hardFail, this);
 
639
        branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
 
640
        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
 
641
            readCharacter(state.inputOffset(), character);
 
642
            or32(Imm32(32), character);
 
643
            branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
 
644
        } else {
 
645
            ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
 
646
            jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
 
647
        }
 
648
 
 
649
        add32(Imm32(1), countRegister);
 
650
        add32(Imm32(1), index);
 
651
 
 
652
        firstTimeDoNothing.link(this);
 
653
        storeToFrame(countRegister, term.frameLocation);
 
654
 
 
655
        state.setBacktrackGenerated(backtrackBegin);
 
656
    }
 
657
 
 
658
    void generateCharacterClassSingle(TermGenerationState& state)
 
659
    {
 
660
        const RegisterID character = regT0;
 
661
        PatternTerm& term = state.term();
 
662
 
 
663
        JumpList matchDest;
 
664
        readCharacter(state.inputOffset(), character);
 
665
        matchCharacterClass(character, matchDest, term.characterClass);
 
666
 
 
667
        if (term.invertOrCapture)
 
668
            state.jumpToBacktrack(matchDest, this);
 
669
        else {
 
670
            state.jumpToBacktrack(jump(), this);
 
671
            matchDest.link(this);
 
672
        }
 
673
    }
 
674
 
 
675
    void generateCharacterClassFixed(TermGenerationState& state)
 
676
    {
 
677
        const RegisterID character = regT0;
 
678
        const RegisterID countRegister = regT1;
 
679
        PatternTerm& term = state.term();
 
680
 
 
681
        move(index, countRegister);
 
682
        sub32(Imm32(term.quantityCount), countRegister);
 
683
 
 
684
        Label loop(this);
 
685
        JumpList matchDest;
 
686
        load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
 
687
        matchCharacterClass(character, matchDest, term.characterClass);
 
688
 
 
689
        if (term.invertOrCapture)
 
690
            state.jumpToBacktrack(matchDest, this);
 
691
        else {
 
692
            state.jumpToBacktrack(jump(), this);
 
693
            matchDest.link(this);
 
694
        }
 
695
 
 
696
        add32(Imm32(1), countRegister);
 
697
        branch32(NotEqual, countRegister, index).linkTo(loop, this);
 
698
    }
 
699
 
 
700
    void generateCharacterClassGreedy(TermGenerationState& state)
 
701
    {
 
702
        const RegisterID character = regT0;
 
703
        const RegisterID countRegister = regT1;
 
704
        PatternTerm& term = state.term();
 
705
    
 
706
        move(Imm32(0), countRegister);
 
707
 
 
708
        JumpList failures;
 
709
        Label loop(this);
 
710
        failures.append(atEndOfInput());
 
711
 
 
712
        if (term.invertOrCapture) {
 
713
            readCharacter(state.inputOffset(), character);
 
714
            matchCharacterClass(character, failures, term.characterClass);
 
715
        } else {
 
716
            JumpList matchDest;
 
717
            readCharacter(state.inputOffset(), character);
 
718
            matchCharacterClass(character, matchDest, term.characterClass);
 
719
            failures.append(jump());
 
720
            matchDest.link(this);
 
721
        }
 
722
 
 
723
        add32(Imm32(1), countRegister);
 
724
        add32(Imm32(1), index);
 
725
        branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
 
726
        failures.append(jump());
 
727
 
 
728
        Label backtrackBegin(this);
 
729
        loadFromFrame(term.frameLocation, countRegister);
 
730
        state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
 
731
        sub32(Imm32(1), countRegister);
 
732
        sub32(Imm32(1), index);
 
733
 
 
734
        failures.link(this);
 
735
 
 
736
        storeToFrame(countRegister, term.frameLocation);
 
737
 
 
738
        state.setBacktrackGenerated(backtrackBegin);
 
739
    }
 
740
 
 
741
    void generateCharacterClassNonGreedy(TermGenerationState& state)
 
742
    {
 
743
        const RegisterID character = regT0;
 
744
        const RegisterID countRegister = regT1;
 
745
        PatternTerm& term = state.term();
 
746
    
 
747
        move(Imm32(0), countRegister);
 
748
 
 
749
        Jump firstTimeDoNothing = jump();
 
750
 
 
751
        Label hardFail(this);
 
752
        sub32(countRegister, index);
 
753
        state.jumpToBacktrack(jump(), this);
 
754
 
 
755
        Label backtrackBegin(this);
 
756
        loadFromFrame(term.frameLocation, countRegister);
 
757
 
 
758
        atEndOfInput().linkTo(hardFail, this);
 
759
        branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
 
760
 
 
761
        JumpList matchDest;
 
762
        readCharacter(state.inputOffset(), character);
 
763
        matchCharacterClass(character, matchDest, term.characterClass);
 
764
 
 
765
        if (term.invertOrCapture)
 
766
            matchDest.linkTo(hardFail, this);
 
767
        else {
 
768
            jump(hardFail);
 
769
            matchDest.link(this);
 
770
        }
 
771
 
 
772
        add32(Imm32(1), countRegister);
 
773
        add32(Imm32(1), index);
 
774
 
 
775
        firstTimeDoNothing.link(this);
 
776
        storeToFrame(countRegister, term.frameLocation);
 
777
 
 
778
        state.setBacktrackGenerated(backtrackBegin);
 
779
    }
 
780
 
 
781
    void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
 
782
    {
 
783
        ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
 
784
        ASSERT(parenthesesTerm.quantityCount == 1);
 
785
    
 
786
        PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
 
787
        unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
 
788
 
 
789
        if (disjunction->m_alternatives.size() == 1) {
 
790
            state.resetAlternative();
 
791
            ASSERT(state.alternativeValid());
 
792
            PatternAlternative* alternative = state.alternative();
 
793
            optimizeAlternative(alternative);
 
794
 
 
795
            int countToCheck = alternative->m_minimumSize - preCheckedCount;
 
796
            if (countToCheck) {
 
797
                ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
 
798
 
 
799
                // FIXME: This is quite horrible.  The call to 'plantJumpToBacktrackIfExists'
 
800
                // will be forced to always trampoline into here, just to decrement the index.
 
801
                // Ick. 
 
802
                Jump skip = jump();
 
803
 
 
804
                Label backtrackBegin(this);
 
805
                sub32(Imm32(countToCheck), index);
 
806
                state.addBacktrackJump(jump());
 
807
                
 
808
                skip.link(this);
 
809
 
 
810
                state.setBacktrackGenerated(backtrackBegin);
 
811
 
 
812
                state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
 
813
                state.checkedTotal += countToCheck;
 
814
            }
 
815
 
 
816
            for (state.resetTerm(); state.termValid(); state.nextTerm())
 
817
                generateTerm(state);
 
818
 
 
819
            state.checkedTotal -= countToCheck;
 
820
        } else {
 
821
            JumpList successes;
 
822
 
 
823
            for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
 
824
 
 
825
                PatternAlternative* alternative = state.alternative();
 
826
                optimizeAlternative(alternative);
 
827
 
 
828
                ASSERT(alternative->m_minimumSize >= preCheckedCount);
 
829
                int countToCheck = alternative->m_minimumSize - preCheckedCount;
 
830
                if (countToCheck) {
 
831
                    state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
 
832
                    state.checkedTotal += countToCheck;
 
833
                }
 
834
 
 
835
                for (state.resetTerm(); state.termValid(); state.nextTerm())
 
836
                    generateTerm(state);
 
837
 
 
838
                // Matched an alternative.
 
839
                DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
 
840
                successes.append(jump());
 
841
 
 
842
                // Alternative did not match.
 
843
                Label backtrackLocation(this);
 
844
                
 
845
                // Can we backtrack the alternative? - if so, do so.  If not, just fall through to the next one.
 
846
                state.plantJumpToBacktrackIfExists(this);
 
847
                
 
848
                state.linkAlternativeBacktracks(this);
 
849
 
 
850
                if (countToCheck) {
 
851
                    sub32(Imm32(countToCheck), index);
 
852
                    state.checkedTotal -= countToCheck;
 
853
                }
 
854
 
 
855
                m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
 
856
            }
 
857
            // We fall through to here when the last alternative fails.
 
858
            // Add a backtrack out of here for the parenthese handling code to link up.
 
859
            state.addBacktrackJump(jump());
 
860
 
 
861
            // Generate a trampoline for the parens code to backtrack to, to retry the
 
862
            // next alternative.
 
863
            state.setBacktrackGenerated(label());
 
864
            loadFromFrameAndJump(alternativeFrameLocation);
 
865
 
 
866
            // FIXME: both of the above hooks are a little inefficient, in that you
 
867
            // may end up trampolining here, just to trampoline back out to the
 
868
            // parentheses code, or vice versa.  We can probably eliminate a jump
 
869
            // by restructuring, but coding this way for now for simplicity during
 
870
            // development.
 
871
 
 
872
            successes.link(this);
 
873
        }
 
874
    }
 
875
 
 
876
    void generateParenthesesSingle(TermGenerationState& state)
 
877
    {
 
878
        const RegisterID indexTemporary = regT0;
 
879
        PatternTerm& term = state.term();
 
880
        PatternDisjunction* disjunction = term.parentheses.disjunction;
 
881
        ASSERT(term.quantityCount == 1);
 
882
 
 
883
        unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
 
884
 
 
885
        unsigned parenthesesFrameLocation = term.frameLocation;
 
886
        unsigned alternativeFrameLocation = parenthesesFrameLocation;
 
887
        if (term.quantityType != QuantifierFixedCount)
 
888
            alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
 
889
 
 
890
        // optimized case - no capture & no quantifier can be handled in a light-weight manner.
 
891
        if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
 
892
            TermGenerationState parenthesesState(disjunction, state.checkedTotal);
 
893
            generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
 
894
            // this expects that any backtracks back out of the parentheses will be in the
 
895
            // parenthesesState's backTrackJumps vector, and that if they need backtracking
 
896
            // they will have set an entry point on the parenthesesState's backtrackLabel.
 
897
            state.propagateBacktrackingFrom(parenthesesState, this);
 
898
        } else {
 
899
            Jump nonGreedySkipParentheses;
 
900
            Label nonGreedyTryParentheses;
 
901
            if (term.quantityType == QuantifierGreedy)
 
902
                storeToFrame(Imm32(1), parenthesesFrameLocation);
 
903
            else if (term.quantityType == QuantifierNonGreedy) {
 
904
                storeToFrame(Imm32(0), parenthesesFrameLocation);
 
905
                nonGreedySkipParentheses = jump();
 
906
                nonGreedyTryParentheses = label();
 
907
                storeToFrame(Imm32(1), parenthesesFrameLocation);
 
908
            }
 
909
 
 
910
            // store the match start index
 
911
            if (term.invertOrCapture) {
 
912
                int inputOffset = state.inputOffset() - preCheckedCount;
 
913
                if (inputOffset) {
 
914
                    move(index, indexTemporary);
 
915
                    add32(Imm32(inputOffset), indexTemporary);
 
916
                    store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
 
917
                } else
 
918
                    store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
 
919
            }
 
920
 
 
921
            // generate the body of the parentheses
 
922
            TermGenerationState parenthesesState(disjunction, state.checkedTotal);
 
923
            generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
 
924
 
 
925
            // store the match end index
 
926
            if (term.invertOrCapture) {
 
927
                int inputOffset = state.inputOffset();
 
928
                if (inputOffset) {
 
929
                    move(index, indexTemporary);
 
930
                    add32(Imm32(state.inputOffset()), indexTemporary);
 
931
                    store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
 
932
                } else
 
933
                    store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
 
934
            }
 
935
            Jump success = jump();
 
936
 
 
937
            // A failure AFTER the parens jumps here
 
938
            Label backtrackFromAfterParens(this);
 
939
 
 
940
            if (term.quantityType == QuantifierGreedy) {
 
941
                // If this is zero we have now tested with both with and without the parens.
 
942
                loadFromFrame(parenthesesFrameLocation, indexTemporary);
 
943
                state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
 
944
            } else if (term.quantityType == QuantifierNonGreedy) {
 
945
                // If this is zero we have now tested with both with and without the parens.
 
946
                loadFromFrame(parenthesesFrameLocation, indexTemporary);
 
947
                branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
 
948
            }
 
949
 
 
950
            parenthesesState.plantJumpToBacktrackIfExists(this);
 
951
            // A failure WITHIN the parens jumps here
 
952
            parenthesesState.linkAlternativeBacktracks(this);
 
953
            if (term.invertOrCapture) {
 
954
                store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
 
955
                store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
 
956
            }
 
957
 
 
958
            if (term.quantityType == QuantifierGreedy)
 
959
                storeToFrame(Imm32(0), parenthesesFrameLocation);
 
960
            else
 
961
                state.jumpToBacktrack(jump(), this);
 
962
 
 
963
            state.setBacktrackGenerated(backtrackFromAfterParens);
 
964
            if (term.quantityType == QuantifierNonGreedy)
 
965
                nonGreedySkipParentheses.link(this);
 
966
            success.link(this);
 
967
        }
 
968
    }
 
969
 
 
970
    void generateParentheticalAssertion(TermGenerationState& state)
 
971
    {
 
972
        PatternTerm& term = state.term();
 
973
        PatternDisjunction* disjunction = term.parentheses.disjunction;
 
974
        ASSERT(term.quantityCount == 1);
 
975
        ASSERT(term.quantityType == QuantifierFixedCount);
 
976
 
 
977
        unsigned parenthesesFrameLocation = term.frameLocation;
 
978
        unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
 
979
 
 
980
        int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
 
981
 
 
982
        if (term.invertOrCapture) {
 
983
            // Inverted case
 
984
            storeToFrame(index, parenthesesFrameLocation);
 
985
 
 
986
            state.checkedTotal -= countCheckedAfterAssertion;
 
987
            if (countCheckedAfterAssertion)
 
988
                sub32(Imm32(countCheckedAfterAssertion), index);
 
989
 
 
990
            TermGenerationState parenthesesState(disjunction, state.checkedTotal);
 
991
            generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
 
992
            // Success! - which means - Fail!
 
993
            loadFromFrame(parenthesesFrameLocation, index);
 
994
            state.jumpToBacktrack(jump(), this);
 
995
 
 
996
            // And fail means success.
 
997
            parenthesesState.linkAlternativeBacktracks(this);
 
998
            loadFromFrame(parenthesesFrameLocation, index);
 
999
 
 
1000
            state.checkedTotal += countCheckedAfterAssertion;
 
1001
        } else {
 
1002
            // Normal case
 
1003
            storeToFrame(index, parenthesesFrameLocation);
 
1004
 
 
1005
            state.checkedTotal -= countCheckedAfterAssertion;
 
1006
            if (countCheckedAfterAssertion)
 
1007
                sub32(Imm32(countCheckedAfterAssertion), index);
 
1008
 
 
1009
            TermGenerationState parenthesesState(disjunction, state.checkedTotal);
 
1010
            generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
 
1011
            // Success! - which means - Success!
 
1012
            loadFromFrame(parenthesesFrameLocation, index);
 
1013
            Jump success = jump();
 
1014
 
 
1015
            parenthesesState.linkAlternativeBacktracks(this);
 
1016
            loadFromFrame(parenthesesFrameLocation, index);
 
1017
            state.jumpToBacktrack(jump(), this);
 
1018
 
 
1019
            success.link(this);
 
1020
 
 
1021
            state.checkedTotal += countCheckedAfterAssertion;
 
1022
        }
 
1023
    }
 
1024
 
 
1025
    void generateTerm(TermGenerationState& state)
 
1026
    {
 
1027
        PatternTerm& term = state.term();
 
1028
 
 
1029
        switch (term.type) {
 
1030
        case PatternTerm::TypeAssertionBOL:
 
1031
            generateAssertionBOL(state);
 
1032
            break;
 
1033
        
 
1034
        case PatternTerm::TypeAssertionEOL:
 
1035
            generateAssertionEOL(state);
 
1036
            break;
 
1037
        
 
1038
        case PatternTerm::TypeAssertionWordBoundary:
 
1039
            generateAssertionWordBoundary(state);
 
1040
            break;
 
1041
        
 
1042
        case PatternTerm::TypePatternCharacter:
 
1043
            switch (term.quantityType) {
 
1044
            case QuantifierFixedCount:
 
1045
                if (term.quantityCount == 1) {
 
1046
                    if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
 
1047
                        generatePatternCharacterPair(state);
 
1048
                        state.nextTerm();
 
1049
                    } else
 
1050
                        generatePatternCharacterSingle(state);
 
1051
                } else
 
1052
                    generatePatternCharacterFixed(state);
 
1053
                break;
 
1054
            case QuantifierGreedy:
 
1055
                generatePatternCharacterGreedy(state);
 
1056
                break;
 
1057
            case QuantifierNonGreedy:
 
1058
                generatePatternCharacterNonGreedy(state);
 
1059
                break;
 
1060
            }
 
1061
            break;
 
1062
 
 
1063
        case PatternTerm::TypeCharacterClass:
 
1064
            switch (term.quantityType) {
 
1065
            case QuantifierFixedCount:
 
1066
                if (term.quantityCount == 1)
 
1067
                    generateCharacterClassSingle(state);
 
1068
                else
 
1069
                    generateCharacterClassFixed(state);
 
1070
                break;
 
1071
            case QuantifierGreedy:
 
1072
                generateCharacterClassGreedy(state);
 
1073
                break;
 
1074
            case QuantifierNonGreedy:
 
1075
                generateCharacterClassNonGreedy(state);
 
1076
                break;
 
1077
            }
 
1078
            break;
 
1079
 
 
1080
        case PatternTerm::TypeBackReference:
 
1081
            m_generationFailed = true;
 
1082
            break;
 
1083
 
 
1084
        case PatternTerm::TypeForwardReference:
 
1085
            break;
 
1086
 
 
1087
        case PatternTerm::TypeParenthesesSubpattern:
 
1088
            if ((term.quantityCount == 1) && !term.parentheses.isCopy)
 
1089
                generateParenthesesSingle(state);
 
1090
            else
 
1091
                m_generationFailed = true;
 
1092
            break;
 
1093
 
 
1094
        case PatternTerm::TypeParentheticalAssertion:
 
1095
            generateParentheticalAssertion(state);
 
1096
            break;
 
1097
        }
 
1098
    }
 
1099
 
 
1100
    void generateDisjunction(PatternDisjunction* disjunction)
 
1101
    {
 
1102
        TermGenerationState state(disjunction, 0);
 
1103
        state.resetAlternative();
 
1104
 
 
1105
        // Plant a check to see if there is sufficient input available to run the first alternative.
 
1106
        // Jumping back to the label 'firstAlternative' will get to this check, jumping to
 
1107
        // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
 
1108
        // skipped this check.
 
1109
 
 
1110
        Label firstAlternative(this);
 
1111
 
 
1112
        // check availability for the next alternative
 
1113
        int countCheckedForCurrentAlternative = 0;
 
1114
        int countToCheckForFirstAlternative = 0;
 
1115
        bool hasShorterAlternatives = false;
 
1116
        JumpList notEnoughInputForPreviousAlternative;
 
1117
 
 
1118
        if (state.alternativeValid()) {
 
1119
            PatternAlternative* alternative = state.alternative();
 
1120
            countToCheckForFirstAlternative = alternative->m_minimumSize;
 
1121
            state.checkedTotal += countToCheckForFirstAlternative;
 
1122
            if (countToCheckForFirstAlternative)
 
1123
                notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
 
1124
            countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
 
1125
        }
 
1126
 
 
1127
        Label firstAlternativeInputChecked(this);
 
1128
 
 
1129
        while (state.alternativeValid()) {
 
1130
            // Track whether any alternatives are shorter than the first one.
 
1131
            hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
 
1132
 
 
1133
            PatternAlternative* alternative = state.alternative();
 
1134
            optimizeAlternative(alternative);
 
1135
 
 
1136
            for (state.resetTerm(); state.termValid(); state.nextTerm())
 
1137
                generateTerm(state);
 
1138
 
 
1139
            // If we get here, the alternative matched.
 
1140
            if (m_pattern.m_body->m_callFrameSize)
 
1141
                addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
 
1142
            
 
1143
            ASSERT(index != returnRegister);
 
1144
            if (m_pattern.m_body->m_hasFixedSize) {
 
1145
                move(index, returnRegister);
 
1146
                if (alternative->m_minimumSize)
 
1147
                    sub32(Imm32(alternative->m_minimumSize), returnRegister);
 
1148
            } else
 
1149
                pop(returnRegister);
 
1150
            store32(index, Address(output, 4));
 
1151
            store32(returnRegister, output);
 
1152
 
 
1153
            generateReturn();
 
1154
 
 
1155
            state.nextAlternative();
 
1156
 
 
1157
            // if there are any more alternatives, plant the check for input before looping.
 
1158
            if (state.alternativeValid()) {
 
1159
                PatternAlternative* nextAlternative = state.alternative();
 
1160
                int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
 
1161
 
 
1162
                if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
 
1163
                    // If we get here, there the last input checked failed.
 
1164
                    notEnoughInputForPreviousAlternative.link(this);
 
1165
 
 
1166
                    // Check if sufficent input available to run the next alternative 
 
1167
                    notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
 
1168
                    // We are now in the correct state to enter the next alternative; this add is only required
 
1169
                    // to mirror and revert operation of the sub32, just below.
 
1170
                    add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
 
1171
 
 
1172
                    // If we get here, there the last input checked passed.
 
1173
                    state.linkAlternativeBacktracks(this);
 
1174
                    // No need to check if we can run the next alternative, since it is shorter -
 
1175
                    // just update index.
 
1176
                    sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
 
1177
                } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
 
1178
                    // If we get here, there the last input checked failed.
 
1179
                    // If there is insufficient input to run the current alternative, and the next alternative is longer,
 
1180
                    // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
 
1181
                    // we had checked.
 
1182
                    notEnoughInputForPreviousAlternative.link(this);
 
1183
                    add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
 
1184
                    notEnoughInputForPreviousAlternative.append(jump());
 
1185
 
 
1186
                    // The next alternative is longer than the current one; check the difference.
 
1187
                    state.linkAlternativeBacktracks(this);
 
1188
                    notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
 
1189
                } else { // CASE 3: Both alternatives are the same length.
 
1190
                    ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
 
1191
 
 
1192
                    // If the next alterative is the same length as this one, then no need to check the input -
 
1193
                    // if there was sufficent input to run the current alternative then there is sufficient
 
1194
                    // input to run the next one; if not, there isn't.
 
1195
                    state.linkAlternativeBacktracks(this);
 
1196
                }
 
1197
 
 
1198
                state.checkedTotal -= countCheckedForCurrentAlternative;
 
1199
                countCheckedForCurrentAlternative = countToCheckForNextAlternative;
 
1200
                state.checkedTotal += countCheckedForCurrentAlternative;
 
1201
            }
 
1202
        }
 
1203
        
 
1204
        // If we get here, all Alternatives failed...
 
1205
 
 
1206
        state.checkedTotal -= countCheckedForCurrentAlternative;
 
1207
 
 
1208
        // How much more input need there be to be able to retry from the first alternative?
 
1209
        // examples:
 
1210
        //   /yarr_jit/ or /wrec|pcre/
 
1211
        //     In these examples we need check for one more input before looping.
 
1212
        //   /yarr_jit|pcre/
 
1213
        //     In this case we need check for 5 more input to loop (+4 to allow for the first alterative
 
1214
        //     being four longer than the last alternative checked, and another +1 to effectively move
 
1215
        //     the start position along by one).
 
1216
        //   /yarr|rules/ or /wrec|notsomuch/
 
1217
        //     In these examples, provided that there was sufficient input to have just been matching for
 
1218
        //     the second alternative we can loop without checking for available input (since the second
 
1219
        //     alternative is longer than the first).  In the latter example we need to decrement index
 
1220
        //     (by 4) so the start position is only progressed by 1 from the last iteration.
 
1221
        int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
 
1222
 
 
1223
        // First, deal with the cases where there was sufficient input to try the last alternative.
 
1224
        if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
 
1225
            state.linkAlternativeBacktracks(this);
 
1226
        else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
 
1227
            state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
 
1228
        else { // no need to check the input, but we do have some bookkeeping to do first.
 
1229
            state.linkAlternativeBacktracks(this);
 
1230
 
 
1231
            // Where necessary update our preserved start position.
 
1232
            if (!m_pattern.m_body->m_hasFixedSize) {
 
1233
                move(index, regT0);
 
1234
                sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
 
1235
                poke(regT0, m_pattern.m_body->m_callFrameSize);
 
1236
            }
 
1237
 
 
1238
            // Update index if necessary, and loop (without checking).
 
1239
            if (incrementForNextIter)
 
1240
                add32(Imm32(incrementForNextIter), index);
 
1241
            jump().linkTo(firstAlternativeInputChecked, this);
 
1242
        }
 
1243
 
 
1244
        notEnoughInputForPreviousAlternative.link(this);
 
1245
        // Update our idea of the start position, if we're tracking this.
 
1246
        if (!m_pattern.m_body->m_hasFixedSize) {
 
1247
            if (countCheckedForCurrentAlternative - 1) {
 
1248
                move(index, regT0);
 
1249
                sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
 
1250
                poke(regT0, m_pattern.m_body->m_callFrameSize);
 
1251
            } else
 
1252
                poke(index, m_pattern.m_body->m_callFrameSize);
 
1253
        }
 
1254
        // Check if there is sufficent input to run the first alternative again.
 
1255
        jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
 
1256
        // No - insufficent input to run the first alteranative, are there any other alternatives we
 
1257
        // might need to check?  If so, the last check will have left the index incremented by
 
1258
        // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
 
1259
        // LESS input is available, to have the effect of just progressing the start position by 1
 
1260
        // from the last iteration.  If this check passes we can just jump up to the check associated
 
1261
        // with the first alternative in the loop.  This is a bit sad, since we'll end up trying the
 
1262
        // first alternative again, and this check will fail (otherwise the check planted just above
 
1263
        // here would have passed).  This is a bit sad, however it saves trying to do something more
 
1264
        // complex here in compilation, and in the common case we should end up coallescing the checks.
 
1265
        //
 
1266
        // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
 
1267
        // of the minimum-alterantive-lengths.  E.g. if I have two alternatives of length 200 and 150,
 
1268
        // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
 
1269
        // is sufficient input to run either alternative (constantly failing).  If there had been only
 
1270
        // one alternative, or if the shorter alternative had come first, we would have terminated
 
1271
        // immediately. :-/
 
1272
        if (hasShorterAlternatives)
 
1273
            jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
 
1274
        // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
 
1275
        // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 
 
1276
        // but since we're about to return a failure this doesn't really matter!)
 
1277
 
 
1278
        unsigned frameSize = m_pattern.m_body->m_callFrameSize;
 
1279
        if (!m_pattern.m_body->m_hasFixedSize)
 
1280
            ++frameSize;
 
1281
        if (frameSize)
 
1282
            addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
 
1283
 
 
1284
        move(Imm32(-1), returnRegister);
 
1285
 
 
1286
        generateReturn();
 
1287
    }
 
1288
 
 
1289
    void generateEnter()
 
1290
    {
 
1291
#if PLATFORM(X86_64)
 
1292
        push(X86Registers::ebp);
 
1293
        move(stackPointerRegister, X86Registers::ebp);
 
1294
        push(X86Registers::ebx);
 
1295
#elif PLATFORM(X86)
 
1296
        push(X86Registers::ebp);
 
1297
        move(stackPointerRegister, X86Registers::ebp);
 
1298
        // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
 
1299
        push(X86Registers::ebx);
 
1300
        push(X86Registers::edi);
 
1301
        push(X86Registers::esi);
 
1302
        // load output into edi (2 = saved ebp + return address).
 
1303
    #if COMPILER(MSVC)
 
1304
        loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
 
1305
        loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
 
1306
        loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
 
1307
        loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
 
1308
    #else
 
1309
        loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
 
1310
    #endif
 
1311
#elif PLATFORM(ARM)
 
1312
#if PLATFORM(ARM_TRADITIONAL)
 
1313
        push(ARMRegisters::lr);
 
1314
#endif
 
1315
        push(ARMRegisters::r4);
 
1316
        push(ARMRegisters::r5);
 
1317
        push(ARMRegisters::r6);
 
1318
        move(ARMRegisters::r3, output);
 
1319
#endif
 
1320
    }
 
1321
 
 
1322
    void generateReturn()
 
1323
    {
 
1324
#if PLATFORM(X86_64)
 
1325
        pop(X86Registers::ebx);
 
1326
        pop(X86Registers::ebp);
 
1327
#elif PLATFORM(X86)
 
1328
        pop(X86Registers::esi);
 
1329
        pop(X86Registers::edi);
 
1330
        pop(X86Registers::ebx);
 
1331
        pop(X86Registers::ebp);
 
1332
#elif PLATFORM(ARM)
 
1333
        pop(ARMRegisters::r6);
 
1334
        pop(ARMRegisters::r5);
 
1335
        pop(ARMRegisters::r4);
 
1336
#endif
 
1337
        ret();
 
1338
    }
 
1339
 
 
1340
public:
 
1341
    RegexGenerator(RegexPattern& pattern)
 
1342
        : m_pattern(pattern)
 
1343
        , m_generationFailed(false)
 
1344
    {
 
1345
    }
 
1346
 
 
1347
    void generate()
 
1348
    {
 
1349
        generateEnter();
 
1350
 
 
1351
        // TODO: do I really want this on the stack?
 
1352
        if (!m_pattern.m_body->m_hasFixedSize)
 
1353
            push(index);
 
1354
 
 
1355
        if (m_pattern.m_body->m_callFrameSize)
 
1356
            subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
 
1357
 
 
1358
        generateDisjunction(m_pattern.m_body);
 
1359
    }
 
1360
 
 
1361
    void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
 
1362
    {
 
1363
        generate();
 
1364
 
 
1365
        LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
 
1366
 
 
1367
        for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
 
1368
            patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
 
1369
 
 
1370
        jitObject.set(patchBuffer.finalizeCode());
 
1371
    }
 
1372
 
 
1373
    bool generationFailed()
 
1374
    {
 
1375
        return m_generationFailed;
 
1376
    }
 
1377
 
 
1378
private:
 
1379
    RegexPattern& m_pattern;
 
1380
    Vector<AlternativeBacktrackRecord> m_backtrackRecords;
 
1381
    bool m_generationFailed;
 
1382
};
 
1383
 
 
1384
void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
 
1385
{
 
1386
    RegexPattern pattern(ignoreCase, multiline);
 
1387
 
 
1388
    if ((error = compileRegex(patternString, pattern)))
 
1389
        return;
 
1390
 
 
1391
    numSubpatterns = pattern.m_numSubpatterns;
 
1392
 
 
1393
    RegexGenerator generator(pattern);
 
1394
    generator.compile(globalData, jitObject);
 
1395
 
 
1396
    if (generator.generationFailed()) {
 
1397
        JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
 
1398
        JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
 
1399
        jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
 
1400
    }
 
1401
}
 
1402
 
 
1403
int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize)
 
1404
{
 
1405
    if (JSRegExp* fallback = jitObject.getFallback())
 
1406
        return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0];
 
1407
 
 
1408
    return jitObject.execute(input, start, length, output);
 
1409
}
 
1410
 
 
1411
}}
 
1412
 
 
1413
#endif
 
1414
 
 
1415
 
 
1416
 
 
1417
 
 
1418