~evarlast/ubuntu/utopic/mongodb/upstart-workaround-debian-bug-718702

Viewing changes to src/third_party/v8/src/jsregexp.cc

Committer: Package Import Robot
Author(s): James Page, James Page, Robie Basak
Date: 2013-05-29 17:44:42 UTC
mfrom: (44.1.7 sid)
Revision ID: package-import@ubuntu.com-20130529174442-z0a4qmoww4y0t458

Tags: 1:2.4.3-1ubuntu1

[ James Page ]
* Merge from Debian unstable, remaining changes:
  - Enable SSL support:
    + d/control: Add libssl-dev to BD's.
    + d/rules: Enabled --ssl option.
    + d/mongodb.conf: Add example SSL configuration options.
  - d/mongodb-server.mongodb.upstart: Add upstart configuration.
  - d/rules: Don't strip binaries during scons build for Ubuntu.
  - d/control: Add armhf to target archs.
  - d/p/SConscript.client.patch: fixup install of client libraries.
  - d/p/0010-install-libs-to-usr-lib-not-usr-lib64-Closes-588557.patch:
    Install libraries to lib not lib64.
* Dropped changes:
  - d/p/arm-support.patch: Included in Debian.
  - d/p/double-alignment.patch: Included in Debian.
  - d/rules,control: Debian also builds with avaliable system libraries
    now.
* Fix FTBFS due to gcc and boost upgrades in saucy:
  - d/p/0008-ignore-unused-local-typedefs.patch: Add -Wno-unused-typedefs
    to unbreak building with g++-4.8.
  - d/p/0009-boost-1.53.patch: Fixup signed/unsigned casting issue.

[ Robie Basak ]
* d/p/0011-Use-a-signed-char-to-store-BSONType-enumerations.patch: Fixup
  build failure on ARM due to missing signed'ness of char cast.

files added:
.pc/0001-Use-libstemmer-when-using-system-libraries.patch

.pc/0001-Use-libstemmer-when-using-system-libraries.patch/SConstruct

.pc/0002-Treate-kfreebsd-like-linux-in-most-respects.patch

.pc/0002-Treate-kfreebsd-like-linux-in-most-respects.patch/SConstruct

.pc/0003-kfreebsd-has-dev-urandom.patch

.pc/0003-kfreebsd-has-dev-urandom.patch/src

.pc/0003-kfreebsd-has-dev-urandom.patch/src/mongo

.pc/0003-kfreebsd-has-dev-urandom.patch/src/mongo/platform

.pc/0003-kfreebsd-has-dev-urandom.patch/src/mongo/platform/random.cpp

.pc/0004-All-platforms-but-Windows-find-hash-in-std-tr1.patch

.pc/0004-All-platforms-but-Windows-find-hash-in-std-tr1.patch/src

.pc/0004-All-platforms-but-Windows-find-hash-in-std-tr1.patch/src/third_party

.pc/0004-All-platforms-but-Windows-find-hash-in-std-tr1.patch/src/third_party/s2

.pc/0004-All-platforms-but-Windows-find-hash-in-std-tr1.patch/src/third_party/s2/hash.h

.pc/0005-kfreebsd-does-not-have-sys-prctl.h.patch

.pc/0005-kfreebsd-does-not-have-sys-prctl.h.patch/src

.pc/0005-kfreebsd-does-not-have-sys-prctl.h.patch/src/third_party

.pc/0005-kfreebsd-does-not-have-sys-prctl.h.patch/src/third_party/v8

.pc/0005-kfreebsd-does-not-have-sys-prctl.h.patch/src/third_party/v8/src

.pc/0005-kfreebsd-does-not-have-sys-prctl.h.patch/src/third_party/v8/src/platform-linux.cc

.pc/0006-ARM-support-for-ASM-operations-in-MongoDB.patch

.pc/0006-ARM-support-for-ASM-operations-in-MongoDB.patch/src

.pc/0006-ARM-support-for-ASM-operations-in-MongoDB.patch/src/mongo

.pc/0006-ARM-support-for-ASM-operations-in-MongoDB.patch/src/mongo/platform

.pc/0006-ARM-support-for-ASM-operations-in-MongoDB.patch/src/mongo/platform/atomic_intrinsics_gcc.h

.pc/0006-ARM-support-for-ASM-operations-in-MongoDB.patch/src/mongo/platform/bits.h

.pc/0007-Fix-ARM-alignment-problems.patch

.pc/0007-Fix-ARM-alignment-problems.patch/src

.pc/0007-Fix-ARM-alignment-problems.patch/src/mongo

.pc/0007-Fix-ARM-alignment-problems.patch/src/mongo/bson

.pc/0007-Fix-ARM-alignment-problems.patch/src/mongo/bson/bsonelement.h

.pc/0007-Fix-ARM-alignment-problems.patch/src/mongo/db

.pc/0007-Fix-ARM-alignment-problems.patch/src/mongo/db/key.cpp

.pc/0008-ignore-unused-local-typedefs.patch

.pc/0008-ignore-unused-local-typedefs.patch/SConstruct

.pc/0009-boost-1.53.patch

.pc/0009-boost-1.53.patch/src

.pc/0009-boost-1.53.patch/src/mongo

.pc/0009-boost-1.53.patch/src/mongo/db

.pc/0009-boost-1.53.patch/src/mongo/db/auth

.pc/0009-boost-1.53.patch/src/mongo/db/auth/privilege_set.cpp

.pc/0010-install-libs-to-usr-lib-not-usr-lib64-Closes-588557.patch

.pc/0010-install-libs-to-usr-lib-not-usr-lib64-Closes-588557.patch/SConstruct

.pc/0011-Use-a-signed-char-to-store-BSONType-enumerations.patch

.pc/0011-Use-a-signed-char-to-store-BSONType-enumerations.patch/src

.pc/0011-Use-a-signed-char-to-store-BSONType-enumerations.patch/src/mongo

.pc/0011-Use-a-signed-char-to-store-BSONType-enumerations.patch/src/mongo/bson

.pc/0011-Use-a-signed-char-to-store-BSONType-enumerations.patch/src/mongo/bson/bson_validate.cpp

CONTRIBUTING.rst

buildscripts/aggregate_tracefiles.py

buildscripts/cpplint.py

buildscripts/fipsld

buildscripts/lint.py

buildscripts/setup_multiversion_mongodb.py

debian/manpages/bsondump.1

debian/manpages/mongooplog.1

debian/manpages/mongoperf.1

debian/manpages/mongotop.1

debian/mongodb-clients.lintian-overrides

debian/mongodb-server.README.Debian

debian/mongodb-server.lintian-overrides

debian/patches/0001-Use-libstemmer-when-using-system-libraries.patch

debian/patches/0002-Treate-kfreebsd-like-linux-in-most-respects.patch

debian/patches/0003-kfreebsd-has-dev-urandom.patch

debian/patches/0004-All-platforms-but-Windows-find-hash-in-std-tr1.patch

debian/patches/0005-kfreebsd-does-not-have-sys-prctl.h.patch

debian/patches/0006-ARM-support-for-ASM-operations-in-MongoDB.patch

debian/patches/0007-Fix-ARM-alignment-problems.patch

debian/patches/0008-ignore-unused-local-typedefs.patch

debian/patches/0009-boost-1.53.patch

debian/patches/0010-install-libs-to-usr-lib-not-usr-lib64-Closes-588557.patch

debian/patches/0011-Use-a-signed-char-to-store-BSONType-enumerations.patch

jstests/aggregation/bugs/server4656.js

jstests/aggregation/bugs/server6125.js

jstests/aggregation/bugs/server6194.js

jstests/aggregation/bugs/server6195.js

jstests/aggregation/bugs/server6239.js

jstests/aggregation/bugs/server6531.js

jstests/aggregation/bugs/server6861.js

jstests/aggregation/bugs/server7768.js

jstests/aggregation/bugs/server7781.js

jstests/aggregation/bugs/server7900.js

jstests/aggregation/bugs/strcasecmp.js

jstests/aggregation/bugs/substr.js

jstests/aggregation/bugs/upperlower.js

jstests/aggregation/testSlave.js

jstests/apply_ops2.js

jstests/auth/adduser_helper.js

jstests/auth/auth_helpers.js

jstests/auth/db_multiple_login.js

jstests/auth/disable_compatibility_privileges.js

jstests/auth/disable_localhost_bypass.js

jstests/auth/implicit_privileges.js

jstests/auth/indexSystemUsers.js

jstests/auth/invalidRole.js

jstests/auth/log_user_basic.js

jstests/auth/log_userid_off.js

jstests/auth/profile.js

jstests/auth/readIndex.js

jstests/auth/readProfile.js

jstests/auth/renameSystemCollections.js

jstests/bad_index_plugin.js

jstests/capped_max.js

jstests/capped_server7543.js

jstests/collmod.js

jstests/connection_status.js

jstests/connections_opened.js

jstests/countc.js

jstests/covered_index_compound_1.js

jstests/covered_index_geo_1.js

jstests/covered_index_geo_2.js

jstests/covered_index_negative_1.js

jstests/covered_index_simple_1.js

jstests/covered_index_simple_2.js

jstests/covered_index_simple_3.js

jstests/covered_index_simple_id.js

jstests/covered_index_sort_1.js

jstests/covered_index_sort_2.js

jstests/covered_index_sort_3.js

jstests/dur/dur_passthrough.js

jstests/explainc.js

jstests/fail_point

jstests/fail_point/fail_point.js

jstests/fts1.js

jstests/fts2.js

jstests/fts3.js

jstests/fts4.js

jstests/fts5.js

jstests/fts_blog.js

jstests/fts_blogwild.js

jstests/fts_mix.js

jstests/fts_partition1.js

jstests/fts_partition_no_multikey.js

jstests/fts_phrase.js

jstests/fts_proj.js

jstests/fts_spanish.js

jstests/geo_allowedcomparisons.js

jstests/geo_exactfetch.js

jstests/geo_haystack3.js

jstests/geo_nearwithin.js

jstests/geo_s2cursorlimitskip.js

jstests/geo_s2dedupnear.js

jstests/geo_s2edgecases.js

jstests/geo_s2exact.js

jstests/geo_s2holesameasshell.js

jstests/geo_s2index.js

jstests/geo_s2indexoldformat.js

jstests/geo_s2intersection.js

jstests/geo_s2largewithin.js

jstests/geo_s2meridian.js

jstests/geo_s2near.js

jstests/geo_s2nearComplex.js

jstests/geo_s2nearwithin.js

jstests/geo_s2nongeoarray.js

jstests/geo_s2nonstring.js

jstests/geo_s2nopoints.js

jstests/geo_s2oddshapes.js

jstests/geo_s2ordering.js

jstests/geo_s2overlappingpolys.js

jstests/geo_s2polywithholes.js

jstests/geo_s2selfintersectingpoly.js

jstests/geo_s2twofields.js

jstests/geo_s2within.js

jstests/geonear_validate.js

jstests/index13.js

jstests/indexOtherNamespace.js

jstests/indexStatsCommand.js

jstests/ismaster.js

jstests/libs/ca.pem

jstests/libs/client.pem

jstests/libs/crl.pem

jstests/libs/crl_client_revoked.pem

jstests/libs/crl_expired.pem

jstests/libs/fts.js

jstests/libs/password_protected.pem

jstests/libs/server.pem

jstests/libs/smoke.pem

jstests/max_message_size.js

jstests/mr_drop.js

jstests/multiVersion/auth_index_upgrade_downgrade.js

jstests/multiVersion/balancer_multiVersion_detect.js

jstests/multiVersion/config_version_excludes.js

jstests/multiVersion/libs/verify_versions.js

jstests/multiVersion/upgrade_cluster_v3_to_v4.js

jstests/multiVersion/upgrade_cluster_v3_to_v4_db.js

jstests/multiVersion/upgrade_cluster_v3_to_v4_wait_for_mongos.js

jstests/multiVersion/upgrade_cluster_v3_to_v4_with_parallel_ops.js

jstests/multiVersion/upgrade_cluster_v3_to_v4_with_stale_mongod.js

jstests/opcounters.js

jstests/push_sort.js

jstests/queryoptimizerc.js

jstests/regex_limit.js

jstests/rename6.js

jstests/replsets/server8070.js

jstests/replsets/server_status_metrics.js

jstests/reversecursor.js

jstests/server9385.js

jstests/sharding/bulk_insert.js

jstests/sharding/config_oplog.js

jstests/sharding/cursor_cleanup.js

jstests/sharding/empty_cluster_init.js

jstests/sharding/explain1.js

jstests/sharding/geo_shardedgeonear.js

jstests/sharding/hash_basic.js

jstests/sharding/hash_presplit.js

jstests/sharding/hash_shard1.js

jstests/sharding/ismaster.js

jstests/sharding/missing_key.js

jstests/sharding/movechunk_include.js

jstests/sharding/movechunk_with_def_paranoia.js

jstests/sharding/movechunk_with_moveParanoia.js

jstests/sharding/movechunk_with_noMoveParanoia.js

jstests/sharding/mrShardedOutputAuth.js

jstests/sharding/mr_noscripting.js

jstests/sharding/read_pref_cmd.js

jstests/sharding/tag_auto_split.js

jstests/sharding/test_stacked_migration_cleanup.js

jstests/slowNightly/index_killop.js

jstests/slowNightly/index_multi.js

jstests/slowNightly/index_retry.js

jstests/slowNightly/logpath.js

jstests/slowNightly/memory.js

jstests/slowNightly/mr_noscripting.js

jstests/slowNightly/replsets_killop.js

jstests/slowNightly/stale_clustered.js

jstests/slowNightly/testing_only_commands.js

jstests/slowNightly/ttl_repl_secondary_disabled.js

jstests/slowWeekly/minvalid.js

jstests/slowWeekly/server6733.js

jstests/ssl

jstests/ssl/replset1.js

jstests/ssl/ssl1.js

jstests/ssl/ssl_cert_password.js

jstests/ssl/ssl_crl.js

jstests/ssl/ssl_crl_revoked.js

jstests/ssl/ssl_weak.js

jstests/storageDetailsCommand.js

jstests/update_setOnInsert.js

jstests/updatel.js

jstests/validate_user_documents.js

src/mongo/base

src/mongo/base/SConscript

src/mongo/base/configuration_variable_manager.cpp

src/mongo/base/configuration_variable_manager.h

src/mongo/base/configuration_variable_manager_test.cpp

src/mongo/base/counter.h

src/mongo/base/counter_test.cpp

src/mongo/base/disallow_copying.h

src/mongo/base/error_codes.err

src/mongo/base/generate_error_codes.py

src/mongo/base/global_initializer.cpp

src/mongo/base/global_initializer.h

src/mongo/base/global_initializer_registerer.cpp

src/mongo/base/global_initializer_registerer.h

src/mongo/base/init.cpp

src/mongo/base/init.h

src/mongo/base/initializer.cpp

src/mongo/base/initializer.h

src/mongo/base/initializer_context.cpp

src/mongo/base/initializer_context.h

src/mongo/base/initializer_dependency_graph.cpp

src/mongo/base/initializer_dependency_graph.h

src/mongo/base/initializer_dependency_graph_test.cpp

src/mongo/base/initializer_function.h

src/mongo/base/initializer_test.cpp

src/mongo/base/make_string_vector.cpp

src/mongo/base/make_string_vector.h

src/mongo/base/owned_pointer_map.h

src/mongo/base/owned_pointer_map_test.cpp

src/mongo/base/owned_pointer_vector.h

src/mongo/base/owned_pointer_vector_test.cpp

src/mongo/base/parse_number.cpp

src/mongo/base/parse_number.h

src/mongo/base/parse_number_test.cpp

src/mongo/base/status.cpp

src/mongo/base/status.h

src/mongo/base/status_test.cpp

src/mongo/base/string_data-inl.h

src/mongo/base/string_data.cpp

src/mongo/base/string_data.h

src/mongo/base/string_data_test.cpp

src/mongo/bson/bson_field.h

src/mongo/bson/bson_field_test.cpp

src/mongo/bson/bson_validate.cpp

src/mongo/bson/bson_validate.h

src/mongo/bson/bson_validate_test.cpp

src/mongo/bson/mutable

src/mongo/bson/mutable/mutable_bson.cpp

src/mongo/bson/mutable/mutable_bson.h

src/mongo/bson/mutable/mutable_bson_algo.h

src/mongo/bson/mutable/mutable_bson_algo_test.cpp

src/mongo/bson/mutable/mutable_bson_builder.cpp

src/mongo/bson/mutable/mutable_bson_builder.h

src/mongo/bson/mutable/mutable_bson_builder_test.cpp

src/mongo/bson/mutable/mutable_bson_heap.cpp

src/mongo/bson/mutable/mutable_bson_heap.h

src/mongo/bson/mutable/mutable_bson_heap_test.cpp

src/mongo/bson/mutable/mutable_bson_internal.cpp

src/mongo/bson/mutable/mutable_bson_internal.h

src/mongo/bson/mutable/mutable_bson_test.cpp

src/mongo/bson/util/bson_extract.cpp

src/mongo/bson/util/bson_extract.h

src/mongo/bson/util/bson_extract_test.cpp

src/mongo/bson/util/builder_test.cpp

src/mongo/client/dbclient_rs_test.cpp

src/mongo/client/sasl_client_authenticate.cpp

src/mongo/client/sasl_client_authenticate.h

src/mongo/client/sasl_client_authenticate_impl.cpp

src/mongo/client/scoped_db_conn_test.cpp

src/mongo/db/SConscript

src/mongo/db/auth

src/mongo/db/auth/SConscript

src/mongo/db/auth/action_set.cpp

src/mongo/db/auth/action_set.h

src/mongo/db/auth/action_set_test.cpp

src/mongo/db/auth/action_types.txt

src/mongo/db/auth/auth_external_state.cpp

src/mongo/db/auth/auth_external_state.h

src/mongo/db/auth/auth_external_state_d.cpp

src/mongo/db/auth/auth_external_state_d.h

src/mongo/db/auth/auth_external_state_mock.h

src/mongo/db/auth/auth_external_state_s.cpp

src/mongo/db/auth/auth_external_state_s.h

src/mongo/db/auth/auth_external_state_server_common.cpp

src/mongo/db/auth/auth_external_state_server_common.h

src/mongo/db/auth/auth_index_d.cpp

src/mongo/db/auth/auth_index_d.h

src/mongo/db/auth/auth_server_parameters.cpp

src/mongo/db/auth/authentication_session.h

src/mongo/db/auth/authorization_manager.cpp

src/mongo/db/auth/authorization_manager.h

src/mongo/db/auth/authorization_manager_test.cpp

src/mongo/db/auth/generate_action_types.py

src/mongo/db/auth/mongo_authentication_session.cpp

src/mongo/db/auth/mongo_authentication_session.h

src/mongo/db/auth/principal.cpp

src/mongo/db/auth/principal.h

src/mongo/db/auth/principal_name.h

src/mongo/db/auth/principal_set.cpp

src/mongo/db/auth/principal_set.h

src/mongo/db/auth/principal_set_test.cpp

src/mongo/db/auth/privilege.cpp

src/mongo/db/auth/privilege.h

src/mongo/db/auth/privilege_set.cpp

src/mongo/db/auth/privilege_set.h

src/mongo/db/auth/privilege_set_test.cpp

src/mongo/db/auth/security_key.cpp

src/mongo/db/auth/security_key.h

src/mongo/db/btree_stats.cpp

src/mongo/db/btree_stats.h

src/mongo/db/btreecursor.h

src/mongo/db/btreeposition.cpp

src/mongo/db/btreeposition.h

src/mongo/db/cc_by_loc.h

src/mongo/db/client_basic.cpp

src/mongo/db/client_basic.h

src/mongo/db/cmdline_test.cpp

src/mongo/db/commands/authentication_commands.cpp

src/mongo/db/commands/authentication_commands.h

src/mongo/db/commands/connection_status.cpp

src/mongo/db/commands/fail_point_cmd.cpp

src/mongo/db/commands/find_and_modify.h

src/mongo/db/commands/find_and_modify_common.cpp

src/mongo/db/commands/index_stats.cpp

src/mongo/db/commands/mr_common.cpp

src/mongo/db/commands/parameters.cpp

src/mongo/db/commands/rename_collection.h

src/mongo/db/commands/rename_collection_common.cpp

src/mongo/db/commands/server_status.cpp

src/mongo/db/commands/server_status.h

src/mongo/db/commands/storage_details.cpp

src/mongo/db/diskloc_test.cpp

src/mongo/db/field_ref.cpp

src/mongo/db/field_ref.h

src/mongo/db/field_ref_test.cpp

src/mongo/db/fts

src/mongo/db/fts/SConscript

src/mongo/db/fts/fts_command.cpp

src/mongo/db/fts/fts_command.h

src/mongo/db/fts/fts_command_mongod.cpp

src/mongo/db/fts/fts_command_mongos.cpp

src/mongo/db/fts/fts_enabled.cpp

src/mongo/db/fts/fts_enabled.h

src/mongo/db/fts/fts_index.cpp

src/mongo/db/fts/fts_index.h

src/mongo/db/fts/fts_index_format.cpp

src/mongo/db/fts/fts_index_format.h

src/mongo/db/fts/fts_index_format_test.cpp

src/mongo/db/fts/fts_matcher.cpp

src/mongo/db/fts/fts_matcher.h

src/mongo/db/fts/fts_matcher_test.cpp

src/mongo/db/fts/fts_query.cpp

src/mongo/db/fts/fts_query.h

src/mongo/db/fts/fts_query_test.cpp

src/mongo/db/fts/fts_search.cpp

src/mongo/db/fts/fts_search.h

src/mongo/db/fts/fts_spec.cpp

src/mongo/db/fts/fts_spec.h

src/mongo/db/fts/fts_spec_test.cpp

src/mongo/db/fts/fts_util.cpp

src/mongo/db/fts/fts_util.h

src/mongo/db/fts/fts_util_test.cpp

src/mongo/db/fts/generate_stop_words.py

src/mongo/db/fts/stemmer.cpp

src/mongo/db/fts/stemmer.h

src/mongo/db/fts/stemmer_test.cpp

src/mongo/db/fts/stop_words.cpp

src/mongo/db/fts/stop_words.h

src/mongo/db/fts/stop_words_danish.txt

src/mongo/db/fts/stop_words_dutch.txt

src/mongo/db/fts/stop_words_english.txt

src/mongo/db/fts/stop_words_finnish.txt

src/mongo/db/fts/stop_words_french.txt

src/mongo/db/fts/stop_words_german.txt

src/mongo/db/fts/stop_words_hungarian.txt

src/mongo/db/fts/stop_words_italian.txt

src/mongo/db/fts/stop_words_norwegian.txt

src/mongo/db/fts/stop_words_portuguese.txt

src/mongo/db/fts/stop_words_romanian.txt

src/mongo/db/fts/stop_words_russian.txt

src/mongo/db/fts/stop_words_spanish.txt

src/mongo/db/fts/stop_words_swedish.txt

src/mongo/db/fts/stop_words_test.cpp

src/mongo/db/fts/stop_words_turkish.txt

src/mongo/db/fts/tokenizer.cpp

src/mongo/db/fts/tokenizer.h

src/mongo/db/fts/tokenizer_test.cpp

src/mongo/db/geo/2d.h

src/mongo/db/geo/geonear.cpp

src/mongo/db/geo/geonear.h

src/mongo/db/geo/geoparser.cpp

src/mongo/db/geo/geoparser.h

src/mongo/db/geo/geoparser_test.cpp

src/mongo/db/geo/geoquery.cpp

src/mongo/db/geo/geoquery.h

src/mongo/db/geo/hash.cpp

src/mongo/db/geo/hash.h

src/mongo/db/geo/hash_test.cpp

src/mongo/db/geo/s2common.cpp

src/mongo/db/geo/s2common.h

src/mongo/db/geo/s2cursor.cpp

src/mongo/db/geo/s2cursor.h

src/mongo/db/geo/s2index.cpp

src/mongo/db/geo/s2index.h

src/mongo/db/geo/s2nearcursor.cpp

src/mongo/db/geo/s2nearcursor.h

src/mongo/db/geo/shapes.cpp

src/mongo/db/index_rebuilder.cpp

src/mongo/db/index_rebuilder.h

src/mongo/db/index_set.cpp

src/mongo/db/index_set.h

src/mongo/db/index_set_test.cpp

src/mongo/db/initialize_server_global_state.cpp

src/mongo/db/initialize_server_global_state.h

src/mongo/db/intervalbtreecursor.cpp

src/mongo/db/intervalbtreecursor.h

src/mongo/db/keypattern.cpp

src/mongo/db/keypattern.h

src/mongo/db/kill_current_op.cpp

src/mongo/db/kill_current_op.h

src/mongo/db/mongod_sm.sln

src/mongo/db/mongod_sm.vcxproj

src/mongo/db/mongod_sm.vcxproj.filters

src/mongo/db/namespacestring_test.cpp

src/mongo/db/ops/modifier_base.h

src/mongo/db/pdfile_version.h

src/mongo/db/pipeline/document_internal.h

src/mongo/db/pipeline/document_source_geo_near.cpp

src/mongo/db/pipeline/value_internal.h

src/mongo/db/server_extra_log_context.cpp

src/mongo/db/server_parameters.cpp

src/mongo/db/server_parameters.h

src/mongo/db/server_parameters_inline.h

src/mongo/db/server_parameters_test.cpp

src/mongo/db/sort_phase_one.h

src/mongo/db/stats/timer_stats.cpp

src/mongo/db/stats/timer_stats.h

src/mongo/dbtests/btreebuildertests.cpp

src/mongo/dbtests/btreepositiontests.cpp

src/mongo/dbtests/config_server_fixture.cpp

src/mongo/dbtests/config_server_fixture.h

src/mongo/dbtests/config_upgrade_tests.cpp

src/mongo/dbtests/extsorttests.cpp

src/mongo/dbtests/indexupdatetests.cpp

src/mongo/dbtests/intervalbtreecursortests.cpp

src/mongo/dbtests/keypatterntests.cpp

src/mongo/dbtests/mock

src/mongo/dbtests/mock/mock_conn_registry.cpp

src/mongo/dbtests/mock/mock_conn_registry.h

src/mongo/dbtests/mock/mock_dbclient_connection.cpp

src/mongo/dbtests/mock/mock_dbclient_connection.h

src/mongo/dbtests/mock/mock_dbclient_cursor.cpp

src/mongo/dbtests/mock/mock_dbclient_cursor.h

src/mongo/dbtests/mock/mock_remote_db_server.cpp

src/mongo/dbtests/mock/mock_remote_db_server.h

src/mongo/dbtests/mock/mock_replica_set.cpp

src/mongo/dbtests/mock/mock_replica_set.h

src/mongo/dbtests/mock_dbclient_conn_test.cpp

src/mongo/dbtests/mock_replica_set_test.cpp

src/mongo/dbtests/profile_test.cpp

src/mongo/dbtests/queryoptimizertests2.cpp

src/mongo/dbtests/test_sm.sln

src/mongo/dbtests/test_sm.vcxproj

src/mongo/dbtests/test_sm.vcxproj.filters

src/mongo/platform/cstdint.h

src/mongo/platform/random.cpp

src/mongo/platform/random.h

src/mongo/platform/random_test.cpp

src/mongo/platform/strtoll.h

src/mongo/platform/unordered_map.h

src/mongo/platform/unordered_set.h

src/mongo/s/SConscript

src/mongo/s/chunk_diff_test.cpp

src/mongo/s/chunk_version.h

src/mongo/s/chunk_version_test.cpp

src/mongo/s/cluster_client_internal.cpp

src/mongo/s/cluster_client_internal.h

src/mongo/s/collection_manager.cpp

src/mongo/s/collection_manager.h

src/mongo/s/collection_manager_test.cpp

src/mongo/s/config_upgrade.cpp

src/mongo/s/config_upgrade.h

src/mongo/s/config_upgrade_helpers.cpp

src/mongo/s/config_upgrade_helpers.h

src/mongo/s/config_upgrade_v0_to_v4.cpp

src/mongo/s/config_upgrade_v3_to_v4.cpp

src/mongo/s/field_parser-inl.h

src/mongo/s/field_parser.cpp

src/mongo/s/field_parser.h

src/mongo/s/field_parser_test.cpp

src/mongo/s/metadata_loader.cpp

src/mongo/s/metadata_loader.h

src/mongo/s/metadata_loader_test.cpp

src/mongo/s/mongo_version_range.cpp

src/mongo/s/mongo_version_range.h

src/mongo/s/mongo_version_range_test.cpp

src/mongo/s/mongos_sm.sln

src/mongo/s/mongos_sm.vcxproj

src/mongo/s/mongos_sm.vcxproj.filters

src/mongo/s/shard_conn_test.cpp

src/mongo/s/stale_exception.h

src/mongo/s/type_changelog.cpp

src/mongo/s/type_changelog.h

src/mongo/s/type_changelog_test.cpp

src/mongo/s/type_chunk.cpp

src/mongo/s/type_chunk.h

src/mongo/s/type_chunk_test.cpp

src/mongo/s/type_collection.cpp

src/mongo/s/type_collection.h

src/mongo/s/type_collection_test.cpp

src/mongo/s/type_config_version.cpp

src/mongo/s/type_config_version.h

src/mongo/s/type_config_version_test.cpp

src/mongo/s/type_database.cpp

src/mongo/s/type_database.h

src/mongo/s/type_database_test.cpp

src/mongo/s/type_lockpings.cpp

src/mongo/s/type_lockpings.h

src/mongo/s/type_lockpings_test.cpp

src/mongo/s/type_locks.cpp

src/mongo/s/type_locks.h

src/mongo/s/type_locks_test.cpp

src/mongo/s/type_mongos.cpp

src/mongo/s/type_mongos.h

src/mongo/s/type_mongos_test.cpp

src/mongo/s/type_settings.cpp

src/mongo/s/type_settings.h

src/mongo/s/type_settings_test.cpp

src/mongo/s/type_shard.cpp

src/mongo/s/type_shard.h

src/mongo/s/type_shard_test.cpp

src/mongo/s/type_tags.cpp

src/mongo/s/type_tags.h

src/mongo/s/type_tags_test.cpp

src/mongo/s/version_manager.cpp

src/mongo/s/version_manager.h

src/mongo/scripting/engine_spidermonkey_internal.h

src/mongo/scripting/v8_deadline_monitor.h

src/mongo/scripting/v8_deadline_monitor_test.cpp

src/mongo/scripting/v8_profiler.cpp

src/mongo/scripting/v8_profiler.h

src/mongo/shell/assert.js

src/mongo/shell/mongo_sm.sln

src/mongo/shell/mongo_sm.vcxproj

src/mongo/shell/mongo_sm.vcxproj.filters

src/mongo/shell/types.js

src/mongo/util/descriptive_stats-inl.h

src/mongo/util/descriptive_stats.h

src/mongo/util/descriptive_stats_test.cpp

src/mongo/util/elapsed_tracker.cpp

src/mongo/util/elapsed_tracker.h

src/mongo/util/exception_filter_win32.cpp

src/mongo/util/exception_filter_win32.h

src/mongo/util/fail_point.cpp

src/mongo/util/fail_point.h

src/mongo/util/fail_point_registry.cpp

src/mongo/util/fail_point_registry.h

src/mongo/util/fail_point_service.cpp

src/mongo/util/fail_point_service.h

src/mongo/util/fail_point_test.cpp

src/mongo/util/file.cpp

src/mongo/util/gsasl_session.cpp

src/mongo/util/gsasl_session.h

src/mongo/util/md5_test.cpp

src/mongo/util/net/sock_test.cpp

src/mongo/util/net/ssl_manager.cpp

src/mongo/util/net/ssl_manager.h

src/mongo/util/ntservice_test.cpp

src/mongo/util/platform_init.cpp

src/mongo/util/processinfo_test.cpp

src/mongo/util/safe_num.cpp

src/mongo/util/safe_num.h

src/mongo/util/safe_num_test.cpp

src/mongo/util/string_map.h

src/mongo/util/string_map_internal.h

src/mongo/util/string_map_test.cpp

src/mongo/util/stringutils_test.cpp

src/mongo/util/text_test.cpp

src/mongo/util/unordered_fast_key_table.h

src/mongo/util/unordered_fast_key_table_internal.h

src/mongodb.sln

src/mongodb_sm.sln

src/third_party/gperftools-2.0/src/config-10gen-macos.h

src/third_party/libstemmer_c

src/third_party/libstemmer_c/MANIFEST

src/third_party/libstemmer_c/Makefile

src/third_party/libstemmer_c/README

src/third_party/libstemmer_c/SConscript

src/third_party/libstemmer_c/examples

src/third_party/libstemmer_c/examples/stemwords.c

src/third_party/libstemmer_c/include

src/third_party/libstemmer_c/include/libstemmer.h

src/third_party/libstemmer_c/libstemmer

src/third_party/libstemmer_c/libstemmer/libstemmer.c

src/third_party/libstemmer_c/libstemmer/libstemmer_c.in

src/third_party/libstemmer_c/libstemmer/libstemmer_utf8.c

src/third_party/libstemmer_c/libstemmer/modules.h

src/third_party/libstemmer_c/libstemmer/modules.txt

src/third_party/libstemmer_c/libstemmer/modules_utf8.h

src/third_party/libstemmer_c/libstemmer/modules_utf8.txt

src/third_party/libstemmer_c/mkinc.mak

src/third_party/libstemmer_c/mkinc_utf8.mak

src/third_party/libstemmer_c/runtime

src/third_party/libstemmer_c/runtime/api.c

src/third_party/libstemmer_c/runtime/api.h

src/third_party/libstemmer_c/runtime/header.h

src/third_party/libstemmer_c/runtime/utilities.c

src/third_party/libstemmer_c/src_c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_danish.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_danish.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_english.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_english.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_french.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_french.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_german.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_german.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_italian.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_italian.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_porter.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_porter.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h

src/third_party/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c

src/third_party/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h

src/third_party/libstemmer_c/src_c/stem_KOI8_R_russian.c

src/third_party/libstemmer_c/src_c/stem_KOI8_R_russian.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_danish.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_danish.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_dutch.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_dutch.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_english.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_english.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_finnish.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_finnish.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_french.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_french.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_german.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_german.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_hungarian.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_hungarian.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_italian.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_italian.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_norwegian.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_norwegian.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_porter.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_porter.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_portuguese.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_portuguese.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_romanian.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_romanian.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_russian.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_russian.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_spanish.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_spanish.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_swedish.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_swedish.h

src/third_party/libstemmer_c/src_c/stem_UTF_8_turkish.c

src/third_party/libstemmer_c/src_c/stem_UTF_8_turkish.h

src/third_party/run_if_newer.js

src/third_party/s2

src/third_party/s2/LICENSE-2.0.txt

src/third_party/s2/Makefile

src/third_party/s2/SConscript

src/third_party/s2/base

src/third_party/s2/base/SConscript

src/third_party/s2/base/basictypes.h

src/third_party/s2/base/casts.h

src/third_party/s2/base/commandlineflags.h

src/third_party/s2/base/definer.h

src/third_party/s2/base/docid.h

src/third_party/s2/base/int128.cc

src/third_party/s2/base/int128.h

src/third_party/s2/base/integral_types.h

src/third_party/s2/base/logging.cc

src/third_party/s2/base/logging.h

src/third_party/s2/base/macros.h

src/third_party/s2/base/port.h

src/third_party/s2/base/scoped_ptr.h

src/third_party/s2/base/stl_decl.h

src/third_party/s2/base/stl_decl_msvc.h

src/third_party/s2/base/stl_decl_osx.h

src/third_party/s2/base/stringprintf.cc

src/third_party/s2/base/stringprintf.h

src/third_party/s2/base/strtoint.cc

src/third_party/s2/base/strtoint.h

src/third_party/s2/base/template_util.h

src/third_party/s2/base/type_traits.h

src/third_party/s2/hash.h

src/third_party/s2/pywraps2_test.py

src/third_party/s2/r1interval.h

src/third_party/s2/r1interval_test.cc

src/third_party/s2/s1angle.cc

src/third_party/s2/s1angle.h

src/third_party/s2/s1angle_test.cc

src/third_party/s2/s1interval.cc

src/third_party/s2/s1interval.h

src/third_party/s2/s1interval_test.cc

src/third_party/s2/s2.cc

src/third_party/s2/s2.h

src/third_party/s2/s2.swig

src/third_party/s2/s2_test.cc

src/third_party/s2/s2cap.cc

src/third_party/s2/s2cap.h

src/third_party/s2/s2cap_test.cc

src/third_party/s2/s2cell.cc

src/third_party/s2/s2cell.h

src/third_party/s2/s2cell_test.cc

src/third_party/s2/s2cellid.cc

src/third_party/s2/s2cellid.h

src/third_party/s2/s2cellid_test.cc

src/third_party/s2/s2cellunion.cc

src/third_party/s2/s2cellunion.h

src/third_party/s2/s2cellunion_test.cc

src/third_party/s2/s2edgeindex.cc

src/third_party/s2/s2edgeindex.h

src/third_party/s2/s2edgeindex_test.cc

src/third_party/s2/s2edgeutil.cc

src/third_party/s2/s2edgeutil.h

src/third_party/s2/s2edgeutil_test.cc

src/third_party/s2/s2latlng.cc

src/third_party/s2/s2latlng.h

src/third_party/s2/s2latlng_test.cc

src/third_party/s2/s2latlngrect.cc

src/third_party/s2/s2latlngrect.h

src/third_party/s2/s2latlngrect_test.cc

src/third_party/s2/s2loop.cc

src/third_party/s2/s2loop.h

src/third_party/s2/s2loop_test.cc

src/third_party/s2/s2pointregion.cc

src/third_party/s2/s2pointregion.h

src/third_party/s2/s2pointregion_test.cc

src/third_party/s2/s2polygon.cc

src/third_party/s2/s2polygon.h

src/third_party/s2/s2polygon_test.cc

src/third_party/s2/s2polygonbuilder.cc

src/third_party/s2/s2polygonbuilder.h

src/third_party/s2/s2polygonbuilder_test.cc

src/third_party/s2/s2polyline.cc

src/third_party/s2/s2polyline.h

src/third_party/s2/s2polyline_test.cc

src/third_party/s2/s2r2rect.cc

src/third_party/s2/s2r2rect.h

src/third_party/s2/s2r2rect_test.cc

src/third_party/s2/s2region.cc

src/third_party/s2/s2region.h

src/third_party/s2/s2regioncoverer.cc

src/third_party/s2/s2regioncoverer.h

src/third_party/s2/s2regioncoverer_test.cc

src/third_party/s2/s2regionintersection.cc

src/third_party/s2/s2regionintersection.h

src/third_party/s2/s2regionunion.cc

src/third_party/s2/s2regionunion.h

src/third_party/s2/s2regionunion_test.cc

src/third_party/s2/s2testing.cc

src/third_party/s2/s2testing.h

src/third_party/s2/strings

src/third_party/s2/strings/SConscript

src/third_party/s2/strings/ascii_ctype.cc

src/third_party/s2/strings/ascii_ctype.h

src/third_party/s2/strings/split.cc

src/third_party/s2/strings/split.h

src/third_party/s2/strings/stringprintf.cc

src/third_party/s2/strings/stringprintf.h

src/third_party/s2/strings/strutil.cc

src/third_party/s2/strings/strutil.h

src/third_party/s2/util

src/third_party/s2/util/coding

src/third_party/s2/util/coding/SConscript

src/third_party/s2/util/coding/coder.cc

src/third_party/s2/util/coding/coder.h

src/third_party/s2/util/coding/varint.cc

src/third_party/s2/util/coding/varint.h

src/third_party/s2/util/endian

src/third_party/s2/util/endian/endian.h

src/third_party/s2/util/hash

src/third_party/s2/util/hash/hash_jenkins_lookup2.h

src/third_party/s2/util/math

src/third_party/s2/util/math/SConscript

src/third_party/s2/util/math/exactfloat

src/third_party/s2/util/math/exactfloat/exactfloat.cc

src/third_party/s2/util/math/exactfloat/exactfloat.h

src/third_party/s2/util/math/mathlimits.cc

src/third_party/s2/util/math/mathlimits.h

src/third_party/s2/util/math/mathutil.cc

src/third_party/s2/util/math/mathutil.h

src/third_party/s2/util/math/matrix3x3-inl.h

src/third_party/s2/util/math/matrix3x3.h

src/third_party/s2/util/math/vector2-inl.h

src/third_party/s2/util/math/vector2.h

src/third_party/s2/util/math/vector3-inl.h

src/third_party/s2/util/math/vector3.h

src/third_party/s2/util/math/vector4-inl.h

src/third_party/s2/util/math/vector4.h

src/third_party/shim_v8.cpp

src/third_party/v8

src/third_party/v8/.gitignore

src/third_party/v8/AUTHORS

src/third_party/v8/ChangeLog

src/third_party/v8/ChangeLog_10gen

src/third_party/v8/LICENSE

src/third_party/v8/LICENSE.strongtalk

src/third_party/v8/LICENSE.v8

src/third_party/v8/LICENSE.valgrind

src/third_party/v8/README

src/third_party/v8/SConscript

src/third_party/v8/include

src/third_party/v8/include/v8-debug.h

src/third_party/v8/include/v8-preparser.h

src/third_party/v8/include/v8-profiler.h

src/third_party/v8/include/v8-testing.h

src/third_party/v8/include/v8.h

src/third_party/v8/include/v8stdint.h

src/third_party/v8/src

src/third_party/v8/src/accessors.cc

src/third_party/v8/src/accessors.h

src/third_party/v8/src/allocation-inl.h

src/third_party/v8/src/allocation.cc

src/third_party/v8/src/allocation.h

src/third_party/v8/src/api.cc

src/third_party/v8/src/api.h

src/third_party/v8/src/apinatives.js

src/third_party/v8/src/apiutils.h

src/third_party/v8/src/arguments.h

src/third_party/v8/src/array.js

src/third_party/v8/src/assembler.cc

src/third_party/v8/src/assembler.h

src/third_party/v8/src/ast.cc

src/third_party/v8/src/ast.h

src/third_party/v8/src/atomicops.h

src/third_party/v8/src/atomicops_internals_arm_gcc.h

src/third_party/v8/src/atomicops_internals_mips_gcc.h

src/third_party/v8/src/atomicops_internals_x86_gcc.cc

src/third_party/v8/src/atomicops_internals_x86_gcc.h

src/third_party/v8/src/atomicops_internals_x86_macosx.h

src/third_party/v8/src/atomicops_internals_x86_msvc.h

src/third_party/v8/src/bignum-dtoa.cc

src/third_party/v8/src/bignum-dtoa.h

src/third_party/v8/src/bignum.cc

src/third_party/v8/src/bignum.h

src/third_party/v8/src/bootstrapper.cc

src/third_party/v8/src/bootstrapper.h

src/third_party/v8/src/builtins.cc

src/third_party/v8/src/builtins.h

src/third_party/v8/src/bytecodes-irregexp.h

src/third_party/v8/src/cached-powers.cc

src/third_party/v8/src/cached-powers.h

src/third_party/v8/src/char-predicates-inl.h

src/third_party/v8/src/char-predicates.h

src/third_party/v8/src/checks.cc

src/third_party/v8/src/checks.h

src/third_party/v8/src/circular-queue-inl.h

src/third_party/v8/src/circular-queue.cc

src/third_party/v8/src/circular-queue.h

src/third_party/v8/src/code-stubs.cc

src/third_party/v8/src/code-stubs.h

src/third_party/v8/src/code.h

src/third_party/v8/src/codegen.cc

src/third_party/v8/src/codegen.h

src/third_party/v8/src/collection.js

src/third_party/v8/src/compilation-cache.cc

src/third_party/v8/src/compilation-cache.h

src/third_party/v8/src/compiler-intrinsics.h

src/third_party/v8/src/compiler.cc

src/third_party/v8/src/compiler.h

src/third_party/v8/src/contexts.cc

src/third_party/v8/src/contexts.h

src/third_party/v8/src/conversions-inl.h

src/third_party/v8/src/conversions.cc

src/third_party/v8/src/conversions.h

src/third_party/v8/src/counters.cc

src/third_party/v8/src/counters.h

src/third_party/v8/src/cpu-profiler-inl.h

src/third_party/v8/src/cpu-profiler.cc

src/third_party/v8/src/cpu-profiler.h

src/third_party/v8/src/cpu.h

src/third_party/v8/src/d8-debug.cc

src/third_party/v8/src/d8-debug.h

src/third_party/v8/src/d8-posix.cc

src/third_party/v8/src/d8-readline.cc

src/third_party/v8/src/d8-windows.cc

src/third_party/v8/src/d8.cc

src/third_party/v8/src/d8.gyp

src/third_party/v8/src/d8.h

src/third_party/v8/src/d8.js

src/third_party/v8/src/data-flow.cc

src/third_party/v8/src/data-flow.h

src/third_party/v8/src/date.cc

src/third_party/v8/src/date.h

src/third_party/v8/src/date.js

src/third_party/v8/src/dateparser-inl.h

src/third_party/v8/src/dateparser.cc

src/third_party/v8/src/dateparser.h

src/third_party/v8/src/debug-agent.cc

src/third_party/v8/src/debug-agent.h

src/third_party/v8/src/debug-debugger.js

src/third_party/v8/src/debug.cc

src/third_party/v8/src/debug.h

src/third_party/v8/src/deoptimizer.cc

src/third_party/v8/src/deoptimizer.h

src/third_party/v8/src/disasm.h

src/third_party/v8/src/disassembler.cc

src/third_party/v8/src/disassembler.h

src/third_party/v8/src/diy-fp.cc

src/third_party/v8/src/diy-fp.h

src/third_party/v8/src/double.h

src/third_party/v8/src/dtoa.cc

src/third_party/v8/src/dtoa.h

src/third_party/v8/src/elements-kind.cc

src/third_party/v8/src/elements-kind.h

src/third_party/v8/src/elements.cc

src/third_party/v8/src/elements.h

src/third_party/v8/src/execution.cc

src/third_party/v8/src/execution.h

src/third_party/v8/src/extensions

src/third_party/v8/src/extensions/externalize-string-extension.cc

src/third_party/v8/src/extensions/externalize-string-extension.h

src/third_party/v8/src/extensions/gc-extension.cc

src/third_party/v8/src/extensions/gc-extension.h

src/third_party/v8/src/extensions/statistics-extension.cc

src/third_party/v8/src/extensions/statistics-extension.h

src/third_party/v8/src/factory.cc

src/third_party/v8/src/factory.h

src/third_party/v8/src/fast-dtoa.cc

src/third_party/v8/src/fast-dtoa.h

src/third_party/v8/src/fixed-dtoa.cc

src/third_party/v8/src/fixed-dtoa.h

src/third_party/v8/src/flag-definitions.h

src/third_party/v8/src/flags.cc

src/third_party/v8/src/flags.h

src/third_party/v8/src/frames-inl.h

src/third_party/v8/src/frames.cc

src/third_party/v8/src/frames.h

src/third_party/v8/src/full-codegen.cc

src/third_party/v8/src/full-codegen.h

src/third_party/v8/src/func-name-inferrer.cc

src/third_party/v8/src/func-name-inferrer.h

src/third_party/v8/src/gdb-jit.cc

src/third_party/v8/src/gdb-jit.h

src/third_party/v8/src/global-handles.cc

src/third_party/v8/src/global-handles.h

src/third_party/v8/src/globals.h

src/third_party/v8/src/handles-inl.h

src/third_party/v8/src/handles.cc

src/third_party/v8/src/handles.h

src/third_party/v8/src/hashmap.h

src/third_party/v8/src/heap-inl.h

src/third_party/v8/src/heap-profiler.cc

src/third_party/v8/src/heap-profiler.h

src/third_party/v8/src/heap.cc

src/third_party/v8/src/heap.h

src/third_party/v8/src/hydrogen-instructions.cc

src/third_party/v8/src/hydrogen-instructions.h

src/third_party/v8/src/hydrogen.cc

src/third_party/v8/src/hydrogen.h

src/third_party/v8/src/ia32

src/third_party/v8/src/ia32/assembler-ia32-inl.h

src/third_party/v8/src/ia32/assembler-ia32.cc

src/third_party/v8/src/ia32/assembler-ia32.h

src/third_party/v8/src/ia32/builtins-ia32.cc

src/third_party/v8/src/ia32/code-stubs-ia32.cc

src/third_party/v8/src/ia32/code-stubs-ia32.h

src/third_party/v8/src/ia32/codegen-ia32.cc

src/third_party/v8/src/ia32/codegen-ia32.h

src/third_party/v8/src/ia32/cpu-ia32.cc

src/third_party/v8/src/ia32/debug-ia32.cc

src/third_party/v8/src/ia32/deoptimizer-ia32.cc

src/third_party/v8/src/ia32/disasm-ia32.cc

src/third_party/v8/src/ia32/frames-ia32.cc

src/third_party/v8/src/ia32/frames-ia32.h

src/third_party/v8/src/ia32/full-codegen-ia32.cc

src/third_party/v8/src/ia32/ic-ia32.cc

src/third_party/v8/src/ia32/lithium-codegen-ia32.cc

src/third_party/v8/src/ia32/lithium-codegen-ia32.h

src/third_party/v8/src/ia32/lithium-gap-resolver-ia32.cc

src/third_party/v8/src/ia32/lithium-gap-resolver-ia32.h

src/third_party/v8/src/ia32/lithium-ia32.cc

src/third_party/v8/src/ia32/lithium-ia32.h

src/third_party/v8/src/ia32/macro-assembler-ia32.cc

src/third_party/v8/src/ia32/macro-assembler-ia32.h

src/third_party/v8/src/ia32/regexp-macro-assembler-ia32.cc

src/third_party/v8/src/ia32/regexp-macro-assembler-ia32.h

src/third_party/v8/src/ia32/simulator-ia32.cc

src/third_party/v8/src/ia32/simulator-ia32.h

src/third_party/v8/src/ia32/stub-cache-ia32.cc

src/third_party/v8/src/ic-inl.h

src/third_party/v8/src/ic.cc

src/third_party/v8/src/ic.h

src/third_party/v8/src/incremental-marking-inl.h

src/third_party/v8/src/incremental-marking.cc

src/third_party/v8/src/incremental-marking.h

src/third_party/v8/src/inspector.cc

src/third_party/v8/src/inspector.h

src/third_party/v8/src/interface.cc

src/third_party/v8/src/interface.h

src/third_party/v8/src/interpreter-irregexp.cc

src/third_party/v8/src/interpreter-irregexp.h

src/third_party/v8/src/isolate-inl.h

src/third_party/v8/src/isolate.cc

src/third_party/v8/src/isolate.h

src/third_party/v8/src/json-parser.h

src/third_party/v8/src/json.js

src/third_party/v8/src/jsregexp.cc

src/third_party/v8/src/jsregexp.h

src/third_party/v8/src/lazy-instance.h

src/third_party/v8/src/list-inl.h

src/third_party/v8/src/list.h

src/third_party/v8/src/lithium-allocator-inl.h

src/third_party/v8/src/lithium-allocator.cc

src/third_party/v8/src/lithium-allocator.h

src/third_party/v8/src/lithium.cc

src/third_party/v8/src/lithium.h

src/third_party/v8/src/liveedit-debugger.js

src/third_party/v8/src/liveedit.cc

src/third_party/v8/src/liveedit.h

src/third_party/v8/src/liveobjectlist-inl.h

src/third_party/v8/src/liveobjectlist.cc

src/third_party/v8/src/liveobjectlist.h

src/third_party/v8/src/log-inl.h

src/third_party/v8/src/log-utils.cc

src/third_party/v8/src/log-utils.h

src/third_party/v8/src/log.cc

src/third_party/v8/src/log.h

src/third_party/v8/src/macro-assembler.h

src/third_party/v8/src/macros.py

src/third_party/v8/src/mark-compact-inl.h

src/third_party/v8/src/mark-compact.cc

src/third_party/v8/src/mark-compact.h

src/third_party/v8/src/math.js

src/third_party/v8/src/messages.cc

src/third_party/v8/src/messages.h

src/third_party/v8/src/messages.js

src/third_party/v8/src/mirror-debugger.js

src/third_party/v8/src/misc-intrinsics.h

src/third_party/v8/src/mksnapshot.cc

src/third_party/v8/src/natives.h

src/third_party/v8/src/objects-debug.cc

src/third_party/v8/src/objects-inl.h

src/third_party/v8/src/objects-printer.cc

src/third_party/v8/src/objects-visiting-inl.h

src/third_party/v8/src/objects-visiting.cc

src/third_party/v8/src/objects-visiting.h

src/third_party/v8/src/objects.cc

src/third_party/v8/src/objects.h

src/third_party/v8/src/once.cc

src/third_party/v8/src/once.h

src/third_party/v8/src/optimizing-compiler-thread.cc

src/third_party/v8/src/optimizing-compiler-thread.h

src/third_party/v8/src/parser.cc

src/third_party/v8/src/parser.h

src/third_party/v8/src/platform-cygwin.cc

src/third_party/v8/src/platform-freebsd.cc

src/third_party/v8/src/platform-linux.cc

src/third_party/v8/src/platform-macos.cc

src/third_party/v8/src/platform-nullos.cc

src/third_party/v8/src/platform-openbsd.cc

src/third_party/v8/src/platform-posix.cc

src/third_party/v8/src/platform-posix.h

src/third_party/v8/src/platform-solaris.cc

src/third_party/v8/src/platform-tls-mac.h

src/third_party/v8/src/platform-tls-win32.h

src/third_party/v8/src/platform-tls.h

src/third_party/v8/src/platform-win32.cc

src/third_party/v8/src/platform.h

src/third_party/v8/src/preparse-data-format.h

src/third_party/v8/src/preparse-data.cc

src/third_party/v8/src/preparse-data.h

src/third_party/v8/src/preparser-api.cc

src/third_party/v8/src/preparser.cc

src/third_party/v8/src/preparser.h

src/third_party/v8/src/prettyprinter.cc

src/third_party/v8/src/prettyprinter.h

src/third_party/v8/src/profile-generator-inl.h

src/third_party/v8/src/profile-generator.cc

src/third_party/v8/src/profile-generator.h

src/third_party/v8/src/property-details.h

src/third_party/v8/src/property.cc

src/third_party/v8/src/property.h

src/third_party/v8/src/proxy.js

src/third_party/v8/src/regexp-macro-assembler-irregexp-inl.h

src/third_party/v8/src/regexp-macro-assembler-irregexp.cc

src/third_party/v8/src/regexp-macro-assembler-irregexp.h

src/third_party/v8/src/regexp-macro-assembler-tracer.cc

src/third_party/v8/src/regexp-macro-assembler-tracer.h

src/third_party/v8/src/regexp-macro-assembler.cc

src/third_party/v8/src/regexp-macro-assembler.h

src/third_party/v8/src/regexp-stack.cc

src/third_party/v8/src/regexp-stack.h

src/third_party/v8/src/regexp.js

src/third_party/v8/src/rewriter.cc

src/third_party/v8/src/rewriter.h

src/third_party/v8/src/runtime-profiler.cc

src/third_party/v8/src/runtime-profiler.h

src/third_party/v8/src/runtime.cc

src/third_party/v8/src/runtime.h

src/third_party/v8/src/runtime.js

src/third_party/v8/src/safepoint-table.cc

src/third_party/v8/src/safepoint-table.h

src/third_party/v8/src/scanner-character-streams.cc

src/third_party/v8/src/scanner-character-streams.h

src/third_party/v8/src/scanner.cc

src/third_party/v8/src/scanner.h

src/third_party/v8/src/scopeinfo.cc

src/third_party/v8/src/scopeinfo.h

src/third_party/v8/src/scopes.cc

src/third_party/v8/src/scopes.h

src/third_party/v8/src/serialize.cc

src/third_party/v8/src/serialize.h

src/third_party/v8/src/simulator.h

src/third_party/v8/src/small-pointer-list.h

src/third_party/v8/src/smart-pointers.h

src/third_party/v8/src/snapshot-common.cc

src/third_party/v8/src/snapshot-empty.cc

src/third_party/v8/src/snapshot.h

src/third_party/v8/src/spaces-inl.h

src/third_party/v8/src/spaces.cc

src/third_party/v8/src/spaces.h

src/third_party/v8/src/splay-tree-inl.h

src/third_party/v8/src/splay-tree.h

src/third_party/v8/src/store-buffer-inl.h

src/third_party/v8/src/store-buffer.cc

src/third_party/v8/src/store-buffer.h

src/third_party/v8/src/string-search.cc

src/third_party/v8/src/string-search.h

src/third_party/v8/src/string-stream.cc

src/third_party/v8/src/string-stream.h

src/third_party/v8/src/string.js

src/third_party/v8/src/strtod.cc

src/third_party/v8/src/strtod.h

src/third_party/v8/src/stub-cache.cc

src/third_party/v8/src/stub-cache.h

src/third_party/v8/src/third_party

src/third_party/v8/src/third_party/valgrind

src/third_party/v8/src/third_party/valgrind/valgrind.h

src/third_party/v8/src/token.cc

src/third_party/v8/src/token.h

src/third_party/v8/src/transitions-inl.h

src/third_party/v8/src/transitions.cc

src/third_party/v8/src/transitions.h

src/third_party/v8/src/type-info.cc

src/third_party/v8/src/type-info.h

src/third_party/v8/src/unbound-queue-inl.h

src/third_party/v8/src/unbound-queue.h

src/third_party/v8/src/unicode-inl.h

src/third_party/v8/src/unicode.cc

src/third_party/v8/src/unicode.h

src/third_party/v8/src/uri.js

src/third_party/v8/src/utils-inl.h

src/third_party/v8/src/utils.cc

src/third_party/v8/src/utils.h

src/third_party/v8/src/v8-counters.cc

src/third_party/v8/src/v8-counters.h

src/third_party/v8/src/v8.cc

src/third_party/v8/src/v8.h

src/third_party/v8/src/v8checks.h

src/third_party/v8/src/v8conversions.cc

src/third_party/v8/src/v8conversions.h

src/third_party/v8/src/v8dll-main.cc

src/third_party/v8/src/v8globals.h

src/third_party/v8/src/v8memory.h

src/third_party/v8/src/v8natives.js

src/third_party/v8/src/v8preparserdll-main.cc

src/third_party/v8/src/v8threads.cc

src/third_party/v8/src/v8threads.h

src/third_party/v8/src/v8utils.cc

src/third_party/v8/src/v8utils.h

src/third_party/v8/src/variables.cc

src/third_party/v8/src/variables.h

src/third_party/v8/src/version.cc

src/third_party/v8/src/version.h

src/third_party/v8/src/vm-state-inl.h

src/third_party/v8/src/vm-state.h

src/third_party/v8/src/win32-headers.h

src/third_party/v8/src/win32-math.cc

src/third_party/v8/src/win32-math.h

src/third_party/v8/src/x64

src/third_party/v8/src/x64/assembler-x64-inl.h

src/third_party/v8/src/x64/assembler-x64.cc

src/third_party/v8/src/x64/assembler-x64.h

src/third_party/v8/src/x64/builtins-x64.cc

src/third_party/v8/src/x64/code-stubs-x64.cc

src/third_party/v8/src/x64/code-stubs-x64.h

src/third_party/v8/src/x64/codegen-x64.cc

src/third_party/v8/src/x64/codegen-x64.h

src/third_party/v8/src/x64/cpu-x64.cc

src/third_party/v8/src/x64/debug-x64.cc

src/third_party/v8/src/x64/deoptimizer-x64.cc

src/third_party/v8/src/x64/disasm-x64.cc

src/third_party/v8/src/x64/frames-x64.cc

src/third_party/v8/src/x64/frames-x64.h

src/third_party/v8/src/x64/full-codegen-x64.cc

src/third_party/v8/src/x64/ic-x64.cc

src/third_party/v8/src/x64/lithium-codegen-x64.cc

src/third_party/v8/src/x64/lithium-codegen-x64.h

src/third_party/v8/src/x64/lithium-gap-resolver-x64.cc

src/third_party/v8/src/x64/lithium-gap-resolver-x64.h

src/third_party/v8/src/x64/lithium-x64.cc

src/third_party/v8/src/x64/lithium-x64.h

src/third_party/v8/src/x64/macro-assembler-x64.cc

src/third_party/v8/src/x64/macro-assembler-x64.h

src/third_party/v8/src/x64/regexp-macro-assembler-x64.cc

src/third_party/v8/src/x64/regexp-macro-assembler-x64.h

src/third_party/v8/src/x64/simulator-x64.cc

src/third_party/v8/src/x64/simulator-x64.h

src/third_party/v8/src/x64/stub-cache-x64.cc

src/third_party/v8/src/zone-inl.h

src/third_party/v8/src/zone.cc

src/third_party/v8/src/zone.h

src/third_party/v8/tools

src/third_party/v8/tools/android-build.sh

src/third_party/v8/tools/android-run.py

src/third_party/v8/tools/android-sync.sh

src/third_party/v8/tools/bash-completion.sh

src/third_party/v8/tools/check-static-initializers.sh

src/third_party/v8/tools/codemap.js

src/third_party/v8/tools/common-includes.sh

src/third_party/v8/tools/consarray.js

src/third_party/v8/tools/csvparser.js

src/third_party/v8/tools/disasm.py

src/third_party/v8/tools/freebsd-tick-processor

src/third_party/v8/tools/fuzz-harness.sh

src/third_party/v8/tools/gc-nvp-trace-processor.py

src/third_party/v8/tools/gcmole

src/third_party/v8/tools/gcmole/Makefile

src/third_party/v8/tools/gcmole/README

src/third_party/v8/tools/gcmole/gccause.lua

src/third_party/v8/tools/gcmole/gcmole.cc

src/third_party/v8/tools/gcmole/gcmole.lua

src/third_party/v8/tools/gdb-v8-support.py

src/third_party/v8/tools/gen-postmortem-metadata.py

src/third_party/v8/tools/generate-ten-powers.scm

src/third_party/v8/tools/grokdump.py

src/third_party/v8/tools/gyp

src/third_party/v8/tools/gyp/v8.gyp

src/third_party/v8/tools/js2c.py

src/third_party/v8/tools/jsmin.py

src/third_party/v8/tools/linux-tick-processor

src/third_party/v8/tools/ll_prof.py

src/third_party/v8/tools/logreader.js

src/third_party/v8/tools/mac-nm

src/third_party/v8/tools/mac-tick-processor

src/third_party/v8/tools/merge-to-branch.sh

src/third_party/v8/tools/oom_dump

src/third_party/v8/tools/oom_dump/README

src/third_party/v8/tools/oom_dump/SConstruct

src/third_party/v8/tools/oom_dump/oom_dump.cc

src/third_party/v8/tools/presubmit.py

src/third_party/v8/tools/process-heap-prof.py

src/third_party/v8/tools/profile.js

src/third_party/v8/tools/profile_view.js

src/third_party/v8/tools/push-to-trunk.sh

src/third_party/v8/tools/run-valgrind.py

src/third_party/v8/tools/splaytree.js

src/third_party/v8/tools/stats-viewer.py

src/third_party/v8/tools/test-wrapper-gypbuild.py

src/third_party/v8/tools/test.py

src/third_party/v8/tools/tickprocessor-driver.js

src/third_party/v8/tools/tickprocessor.js

src/third_party/v8/tools/utils.py

src/third_party/v8/tools/visual_studio

src/third_party/v8/tools/visual_studio/README.txt

src/third_party/v8/tools/windows-tick-processor.bat

files removed:
.pc/0001-install-libs-to-usr-lib-not-usr-lib64-Closes-588557.patch

.pc/0001-install-libs-to-usr-lib-not-usr-lib64-Closes-588557.patch/SConstruct

.pc/arm-support.patch

.pc/arm-support.patch/src

.pc/arm-support.patch/src/mongo

.pc/arm-support.patch/src/mongo/platform

.pc/arm-support.patch/src/mongo/platform/atomic_intrinsics_gcc.h

.pc/arm-support.patch/src/mongo/platform/bits.h

.pc/double-alignment.patch

.pc/double-alignment.patch/src

.pc/double-alignment.patch/src/mongo

.pc/double-alignment.patch/src/mongo/bson

.pc/double-alignment.patch/src/mongo/bson/bsonelement.h

.pc/double-alignment.patch/src/mongo/db

.pc/double-alignment.patch/src/mongo/db/key.cpp

buildscripts/bb.py

buildscripts/emr/lib

buildscripts/emr/lib/hadoop-core-0.20.205.0.jar

debian/patches/0001-install-libs-to-usr-lib-not-usr-lib64-Closes-588557.patch

debian/patches/arm-support.patch

debian/patches/double-alignment.patch

jstests/ageoutjournalfiles.js

jstests/count8.js

jstests/logpath.js

jstests/memory.js

jstests/mr_noscripting.js

jstests/multiVersion/multi_version_sharding_passthrough.js

jstests/replsets/stale_clustered.js

jstests/sharding/authCommands2.js

jstests/sharding/deletion_range.js

jstests/sharding/passthrough1.js

jstests/sharding/read_pref_rs_client.js

jstests/slowNightly/dur_passthrough.js

jstests/slowNightly/repl_monitor_stress.js

jstests/slowNightly/sharding_cursors1.js

jstests/slowWeekly/dur_passthrough.js

jstests/slowWeekly/indexbg_dur.js

src/mongo/bson/stringdata.h

src/mongo/client/authentication_table.h

src/mongo/client/authentication_table_client.cpp

src/mongo/client/authentication_table_common.cpp

src/mongo/client/authentication_table_server.cpp

src/mongo/db/client_common.h

src/mongo/db/compact.h

src/mongo/db/globals.h

src/mongo/db/helpers

src/mongo/db/helpers/dblogger.h

src/mongo/db/modules

src/mongo/db/modules/mms.cpp

src/mongo/db/nonce.cpp

src/mongo/db/nonce.h

src/mongo/db/security.cpp

src/mongo/db/security.h

src/mongo/db/security_commands.cpp

src/mongo/db/security_common.cpp

src/mongo/db/security_common.h

src/mongo/dbtests/authtests.cpp

src/mongo/s/config_migrate.cpp

src/mongo/s/security.cpp

src/mongo/s/shard_version.cpp

src/mongo/s/shard_version.h

src/mongo/s/stats.cpp

src/mongo/s/stats.h

src/mongo/s/util.h

src/mongo/scripting/v8_wrapper.cpp

src/mongo/scripting/v8_wrapper.h

src/third_party/pcre-8.30/config.status

files modified:
.gitignore

.pc/SConscript.client.patch/src/SConscript.client

.pc/applied-patches

SConscript.buildinfo

SConscript.smoke

SConstruct

buildscripts/cleanbb.py

buildscripts/moduleconfig.py

buildscripts/packager.py

buildscripts/s3del.py

buildscripts/smoke.py

buildscripts/sourcepush.py

buildscripts/utils.py

debian/README.Debian

debian/README.source

debian/changelog

debian/control

debian/copyright

debian/manpages/mongo.1

debian/manpages/mongod.1

debian/manpages/mongodump.1

debian/manpages/mongoexport.1

debian/manpages/mongofiles.1

debian/manpages/mongoimport.1

debian/manpages/mongorestore.1

debian/manpages/mongos.1

debian/manpages/mongosniff.1

debian/manpages/mongostat.1

debian/mongodb-clients.install

debian/mongodb-clients.manpages

debian/patches/SConscript.client.patch

debian/patches/series

debian/rules

debian/watch

distsrc/THIRD-PARTY-NOTICES

distsrc/client/SConstruct

docs/building.md

doxygenConfig

jstests/aggregation/bugs/cond.js

jstests/aggregation/bugs/firstlast.js

jstests/aggregation/bugs/ifnull.js

jstests/aggregation/bugs/match.js

jstests/aggregation/bugs/server3832.js

jstests/aggregation/bugs/server4738.js

jstests/aggregation/bugs/server5012.js

jstests/aggregation/bugs/server5209.js

jstests/aggregation/bugs/server6045.js

jstests/aggregation/bugs/server6121.js

jstests/aggregation/bugs/server6127.js

jstests/aggregation/bugs/server6131.js

jstests/aggregation/bugs/server6143.js

jstests/aggregation/bugs/server6147.js

jstests/aggregation/bugs/server6165.js

jstests/aggregation/bugs/server6177.js

jstests/aggregation/bugs/server6189.js

jstests/aggregation/bugs/server6190.js

jstests/aggregation/bugs/server6198.js

jstests/aggregation/bugs/server6238.js

jstests/aggregation/bugs/server6240.js

jstests/aggregation/bugs/server6290.js

jstests/aggregation/bugs/server6335.js

jstests/aggregation/bugs/server6361.js

jstests/aggregation/bugs/server6529.js

jstests/aggregation/bugs/server6530.js

jstests/aggregation/bugs/server6570.js

jstests/aggregation/disabled/server5782.js

jstests/aggregation/extras/utils.js

jstests/aggregation/testall.js

jstests/aggregation/testshard1.js

jstests/apply_ops1.js

jstests/arrayfind2.js

jstests/auth/auth1.js

jstests/auth/copyauth.js

jstests/auth/localhostAuthBypass.js

jstests/basicb.js

jstests/bench_test1.js

jstests/big_object1.js

jstests/compact.js

jstests/count2.js

jstests/dbadmin.js

jstests/dbcase2.js

jstests/dbref2.js

jstests/disk/directoryperdb.js

jstests/disk/diskfull.js

jstests/disk/newcollection.js

jstests/disk/preallocate.js

jstests/disk/repair5.js

jstests/drop.js

jstests/drop2.js

jstests/drop_index.js

jstests/eval9.js

jstests/evalb.js

jstests/evald.js

jstests/explainb.js

jstests/find1.js

jstests/geo_array0.js

jstests/geo_array1.js

jstests/geo_borders.js

jstests/geo_box1_noindex.js

jstests/geo_center_sphere1.js

jstests/geo_circle1_noindex.js

jstests/geo_circle4.js

jstests/geo_polygon1_noindex.js

jstests/geo_withinquery.js

jstests/gle_shell_server5441.js

jstests/hashindex1.js

jstests/indexy.js

jstests/insert2.js

jstests/json1.js

jstests/minmax.js

jstests/mr_errorhandling.js

jstests/mr_killop.js

jstests/multiVersion/0_test_launching.js

jstests/multiVersion/1_test_launching_replset.js

jstests/multiVersion/2_test_launching_cluster.js

jstests/multiVersion/3_upgrade_replset.js

jstests/multiVersion/4_upgrade_cluster.js

jstests/multiVersion/libs/multi_cluster.js

jstests/nestedobj1.js

jstests/padding.js

jstests/parallel/manyclients.js

jstests/profile1.js

jstests/profile3.js

jstests/profile4.js

jstests/quota/quota1.js

jstests/removeb.js

jstests/repl/repl12.js

jstests/repl/repl14.js

jstests/repl/repl2.js

jstests/repl/repl7.js

jstests/replsets/auth1.js

jstests/replsets/capped_id.js

jstests/replsets/fsync_lock_read_secondaries.js

jstests/replsets/initial_sync3.js

jstests/replsets/localhostAuthBypass.js

jstests/replsets/majority.js

jstests/replsets/no_chaining.js

jstests/replsets/remove1.js

jstests/replsets/replset1.js

jstests/replsets/replset4.js

jstests/replsets/replset5.js

jstests/replsets/replset9.js

jstests/replsets/replsetrestart1.js

jstests/replsets/rslib.js

jstests/replsets/stepdown.js

jstests/replsets/stepdown2.js

jstests/replsets/sync2.js

jstests/replsets/sync_passive2.js

jstests/replsets/tags.js

jstests/sharding/array_shard_key.js

jstests/sharding/auth.js

jstests/sharding/authCommands.js

jstests/sharding/authConnectionHook.js

jstests/sharding/auth_config_down.js

jstests/sharding/auth_slaveok_routing.js

jstests/sharding/auto2.js

jstests/sharding/count1.js

jstests/sharding/features2.js

jstests/sharding/features3.js

jstests/sharding/geo_near_random1.js

jstests/sharding/gle_error_message.js

jstests/sharding/gle_with_conf_servers.js

jstests/sharding/inTiming.js

jstests/sharding/index1.js

jstests/sharding/key_string.js

jstests/sharding/localhostAuthBypass.js

jstests/sharding/major_version_check.js

jstests/sharding/migrateBig.js

jstests/sharding/mongos_validate_backoff.js

jstests/sharding/moveprimary_ignore_sharded.js

jstests/sharding/mrShardedOutput.js

jstests/sharding/prefix_shard_key.js

jstests/sharding/read_pref.js

jstests/sharding/remove2.js

jstests/sharding/shard5.js

jstests/sharding/shard6.js

jstests/sharding/shard_existing.js

jstests/sharding/shard_gle_insert.js

jstests/sharding/shard_insert_getlasterror_w2.js

jstests/sharding/stats.js

jstests/sharding/sync2.js

jstests/sharding/sync3.js

jstests/sharding/sync6.js

jstests/sharding/update1.js

jstests/sharding/user_flags_sharded.js

jstests/sharding/version2.js

jstests/sharding/writeback_bulk_insert.js

jstests/sharding/zbigMapReduce.js

jstests/slowNightly/autosplit_heuristics.js

jstests/slowNightly/balance_repl.js

jstests/slowNightly/command_line_parsing.js

jstests/slowNightly/index_check9.js

jstests/slowNightly/large_chunk.js

jstests/slowNightly/replReads.js

jstests/slowNightly/sharding_migrateBigObject.js

jstests/slowNightly/sharding_migrate_cursor1.js

jstests/slowNightly/sharding_passthrough.js

jstests/slowNightly/sharding_rs1.js

jstests/slowNightly/sync6_slow.js

jstests/slowNightly/temp_namespace.js

jstests/slowNightly/ttl1.js

jstests/slowNightly/ttl_repl.js

jstests/slowNightly/ttl_sharded.js

jstests/slowWeekly/conc_update.js

jstests/slowWeekly/cursor_timeout.js

jstests/slowWeekly/indexbg1.js

jstests/splitvector.js

jstests/stats.js

jstests/tool/csvexport1.js

jstests/tool/dumpauth.js

jstests/tool/dumprestore1.js

jstests/tool/dumprestore5.js

jstests/tool/exportimport1.js

jstests/tool/restorewithauth.js

jstests/tool/tool_replset.js

jstests/ts1.js

jstests/update_arraymatch8.js

jstests/use_power_of_2.js

rpm/mongo.spec *

site_scons/libdeps.py

site_scons/site_tools/jsheader.py

site_scons/site_tools/unittest.py

src/SConscript

src/SConscript.client

src/mongo/SConscript

src/mongo/bson/bson-inl.h

src/mongo/bson/bson.h

src/mongo/bson/bson_builder_base.h

src/mongo/bson/bson_db.h

src/mongo/bson/bsondemo/bsondemo.cpp

src/mongo/bson/bsondemo/bsondemo.vcxproj

src/mongo/bson/bsonelement.h

src/mongo/bson/bsonmisc.h

src/mongo/bson/bsonobj.h

src/mongo/bson/bsonobjbuilder.h

src/mongo/bson/bsonobjiterator.h

src/mongo/bson/bsontypes.h

src/mongo/bson/oid.cpp

src/mongo/bson/oid.h

src/mongo/bson/util/builder.h

src/mongo/bson/util/misc.h

src/mongo/client/clientAndShell.cpp

src/mongo/client/clientOnly-private.h

src/mongo/client/clientOnly.cpp

src/mongo/client/connection_factory.cpp

src/mongo/client/connpool.cpp

src/mongo/client/connpool.h

src/mongo/client/constants.h

src/mongo/client/dbclient.cpp

src/mongo/client/dbclient.h

src/mongo/client/dbclient_rs.cpp

src/mongo/client/dbclient_rs.h

src/mongo/client/dbclientcursor.cpp

src/mongo/client/dbclientcursor.h

src/mongo/client/dbclientinterface.h

src/mongo/client/distlock.cpp

src/mongo/client/distlock.h

src/mongo/client/distlock_test.cpp

src/mongo/client/examples/clientTest.cpp

src/mongo/client/examples/first.cpp

src/mongo/client/examples/insert_demo.cpp

src/mongo/client/examples/mongoperf.cpp

src/mongo/client/examples/mongoperf.vcxproj

src/mongo/client/examples/mongoperf.vcxproj.filters

src/mongo/client/gridfs.cpp

src/mongo/client/mongo_client_lib.cpp

src/mongo/client/parallel.cpp

src/mongo/client/parallel.h

src/mongo/client/redef_macros.h

src/mongo/client/simple_client_demo.cpp

src/mongo/client/syncclusterconnection.cpp

src/mongo/client/syncclusterconnection.h

src/mongo/client/undef_macros.h

src/mongo/db/btree.cpp

src/mongo/db/btree.h

src/mongo/db/btreebuilder.cpp

src/mongo/db/btreebuilder.h

src/mongo/db/btreecursor.cpp

src/mongo/db/cap.cpp

src/mongo/db/client.cpp

src/mongo/db/client.h

src/mongo/db/clientcursor.cpp

src/mongo/db/clientcursor.h

src/mongo/db/cloner.cpp

src/mongo/db/cloner.h

src/mongo/db/cmdline.cpp

src/mongo/db/cmdline.h

src/mongo/db/collection.h

src/mongo/db/commands.cpp *

src/mongo/db/commands.h

src/mongo/db/commands/cpuprofile.cpp

src/mongo/db/commands/distinct.cpp

src/mongo/db/commands/find_and_modify.cpp

src/mongo/db/commands/fsync.cpp

src/mongo/db/commands/fsync.h

src/mongo/db/commands/group.cpp

src/mongo/db/commands/hashcmd.cpp

src/mongo/db/commands/isself.cpp

src/mongo/db/commands/mr.cpp

src/mongo/db/commands/mr.h

src/mongo/db/commands/pipeline_command.cpp *

src/mongo/db/commands/touch.cpp

src/mongo/db/compact.cpp

src/mongo/db/connection_factory.cpp

src/mongo/db/curop-inl.h

src/mongo/db/curop.cpp

src/mongo/db/curop.h

src/mongo/db/cursor.cpp

src/mongo/db/cursor.h

src/mongo/db/d_concurrency.cpp

src/mongo/db/d_concurrency.h

src/mongo/db/d_globals.cpp

src/mongo/db/d_globals.h

src/mongo/db/database.cpp

src/mongo/db/database.h

src/mongo/db/databaseholder.h

src/mongo/db/db.cpp

src/mongo/db/db.rc *

src/mongo/db/db_10.sln *

src/mongo/db/dbcommands.cpp

src/mongo/db/dbcommands_admin.cpp

src/mongo/db/dbcommands_generic.cpp

src/mongo/db/dbeval.cpp

src/mongo/db/dbhelpers.cpp

src/mongo/db/dbhelpers.h

src/mongo/db/dbmessage.cpp

src/mongo/db/dbmessage.h

src/mongo/db/dbwebserver.cpp

src/mongo/db/dbwebserver.h

src/mongo/db/diskloc.h

src/mongo/db/driverHelpers.cpp

src/mongo/db/dur.cpp

src/mongo/db/dur.h

src/mongo/db/dur_commitjob.cpp

src/mongo/db/dur_commitjob.h

src/mongo/db/dur_journal.cpp

src/mongo/db/dur_journalimpl.h

src/mongo/db/dur_preplogbuffer.cpp

src/mongo/db/dur_recover.cpp

src/mongo/db/dur_recover.h

src/mongo/db/dur_stats.h

src/mongo/db/dur_writetodatafiles.cpp

src/mongo/db/durop.cpp

src/mongo/db/durop.h

src/mongo/db/extsort.cpp

src/mongo/db/extsort.h

src/mongo/db/geo/2d.cpp

src/mongo/db/geo/core.h

src/mongo/db/geo/haystack.cpp

src/mongo/db/geo/shapes.h

src/mongo/db/hasher.cpp

src/mongo/db/hasher.h

src/mongo/db/hashindex.cpp

src/mongo/db/hashindex.h

src/mongo/db/index.cpp

src/mongo/db/index.h

src/mongo/db/index_update.cpp

src/mongo/db/index_update.h

src/mongo/db/indexkey.cpp

src/mongo/db/indexkey.h

src/mongo/db/instance.cpp

src/mongo/db/instance.h

src/mongo/db/interrupt_status.h *

src/mongo/db/interrupt_status_mongod.cpp *

src/mongo/db/interrupt_status_mongod.h *

src/mongo/db/introspect.cpp

src/mongo/db/introspect.h

src/mongo/db/jsobj.cpp

src/mongo/db/jsobj.h

src/mongo/db/json.cpp

src/mongo/db/json.h

src/mongo/db/lockstate.cpp *

src/mongo/db/lockstate.h

src/mongo/db/matcher.cpp

src/mongo/db/matcher.h

src/mongo/db/matcher_covered.cpp

src/mongo/db/memconcept.cpp

src/mongo/db/memconcept.h

src/mongo/db/module.cpp

src/mongo/db/module.h

src/mongo/db/mongod.sln

src/mongo/db/mongod.vcxproj *

src/mongo/db/mongod.vcxproj.filters *

src/mongo/db/mongommf.cpp

src/mongo/db/mongommf.h

src/mongo/db/namespace-inl.h

src/mongo/db/namespace.h

src/mongo/db/namespace_details-inl.h

src/mongo/db/namespace_details.cpp

src/mongo/db/namespace_details.h

src/mongo/db/namespacestring.h

src/mongo/db/oplog.cpp

src/mongo/db/oplog.h

src/mongo/db/oplogreader.h

src/mongo/db/ops/count.cpp

src/mongo/db/ops/query.cpp

src/mongo/db/ops/query.h

src/mongo/db/ops/update.cpp

src/mongo/db/ops/update_internal.cpp

src/mongo/db/ops/update_internal.h

src/mongo/db/pagefault.cpp

src/mongo/db/pagefault.h

src/mongo/db/pdfile.cpp

src/mongo/db/pdfile.h

src/mongo/db/pipeline/accumulator.cpp *

src/mongo/db/pipeline/accumulator.h *

src/mongo/db/pipeline/accumulator_add_to_set.cpp *

src/mongo/db/pipeline/accumulator_avg.cpp *

src/mongo/db/pipeline/accumulator_first.cpp *

src/mongo/db/pipeline/accumulator_last.cpp *

src/mongo/db/pipeline/accumulator_min_max.cpp *

src/mongo/db/pipeline/accumulator_push.cpp *

src/mongo/db/pipeline/accumulator_single_value.cpp *

src/mongo/db/pipeline/accumulator_sum.cpp *

src/mongo/db/pipeline/builder.cpp *

src/mongo/db/pipeline/builder.h *

src/mongo/db/pipeline/doc_mem_monitor.cpp *

src/mongo/db/pipeline/doc_mem_monitor.h *

src/mongo/db/pipeline/document.cpp *

src/mongo/db/pipeline/document.h *

src/mongo/db/pipeline/document_source.cpp *

src/mongo/db/pipeline/document_source.h *

src/mongo/db/pipeline/document_source_bson_array.cpp *

src/mongo/db/pipeline/document_source_command_shards.cpp

src/mongo/db/pipeline/document_source_cursor.cpp *

src/mongo/db/pipeline/document_source_filter.cpp *

src/mongo/db/pipeline/document_source_filter_base.cpp *

src/mongo/db/pipeline/document_source_group.cpp *

src/mongo/db/pipeline/document_source_limit.cpp

src/mongo/db/pipeline/document_source_match.cpp

src/mongo/db/pipeline/document_source_out.cpp *

src/mongo/db/pipeline/document_source_project.cpp

src/mongo/db/pipeline/document_source_skip.cpp

src/mongo/db/pipeline/document_source_sort.cpp *

src/mongo/db/pipeline/document_source_unwind.cpp *

src/mongo/db/pipeline/expression.cpp

src/mongo/db/pipeline/expression.h *

src/mongo/db/pipeline/expression_context.cpp *

src/mongo/db/pipeline/expression_context.h *

src/mongo/db/pipeline/field_path.h *

src/mongo/db/pipeline/pipeline.cpp

src/mongo/db/pipeline/pipeline.h *

src/mongo/db/pipeline/pipeline_d.cpp *

src/mongo/db/pipeline/pipeline_d.h *

src/mongo/db/pipeline/value.cpp

src/mongo/db/pipeline/value.h *

src/mongo/db/prefetch.cpp

src/mongo/db/projection.cpp

src/mongo/db/projection.h

src/mongo/db/queryoptimizer.cpp

src/mongo/db/queryoptimizer.h

src/mongo/db/queryoptimizercursor.h

src/mongo/db/queryoptimizercursorimpl.cpp

src/mongo/db/queryoptimizercursorimpl.h

src/mongo/db/queryutil-inl.h

src/mongo/db/queryutil.cpp

src/mongo/db/queryutil.h

src/mongo/db/record.cpp

src/mongo/db/record.h

src/mongo/db/repl.cpp

src/mongo/db/repl.h

src/mongo/db/repl/bgsync.cpp

src/mongo/db/repl/bgsync.h

src/mongo/db/repl/connections.h

src/mongo/db/repl/consensus.cpp

src/mongo/db/repl/health.cpp

src/mongo/db/repl/health.h

src/mongo/db/repl/heartbeat.cpp

src/mongo/db/repl/manager.cpp

src/mongo/db/repl/replset_commands.cpp

src/mongo/db/repl/rs.cpp

src/mongo/db/repl/rs.h

src/mongo/db/repl/rs_config.cpp

src/mongo/db/repl/rs_config.h

src/mongo/db/repl/rs_exception.h

src/mongo/db/repl/rs_initialsync.cpp

src/mongo/db/repl/rs_initiate.cpp

src/mongo/db/repl/rs_member.h

src/mongo/db/repl/rs_rollback.cpp

src/mongo/db/repl/rs_sync.cpp

src/mongo/db/repl/rs_sync.h

src/mongo/db/repl_block.cpp

src/mongo/db/repl_block.h

src/mongo/db/replutil.h

src/mongo/db/resource.h

src/mongo/db/restapi.cpp

src/mongo/db/scanandorder.h

src/mongo/db/stats/counters.cpp

src/mongo/db/stats/counters.h

src/mongo/db/stats/snapshots.cpp

src/mongo/db/stats/top.cpp

src/mongo/db/stats/top.h

src/mongo/db/ttl.cpp

src/mongo/dbtests/accumulatortests.cpp

src/mongo/dbtests/basictests.cpp

src/mongo/dbtests/btreetests.cpp

src/mongo/dbtests/btreetests.inl

src/mongo/dbtests/clienttests.cpp

src/mongo/dbtests/counttests.cpp

src/mongo/dbtests/cursortests.cpp

src/mongo/dbtests/d_chunk_manager_tests.cpp

src/mongo/dbtests/dbtests.cpp

src/mongo/dbtests/documentsourcetests.cpp

src/mongo/dbtests/documenttests.cpp

src/mongo/dbtests/expressiontests.cpp

src/mongo/dbtests/framework.cpp

src/mongo/dbtests/jsobjtests.cpp

src/mongo/dbtests/jsontests.cpp

src/mongo/dbtests/jstests.cpp

src/mongo/dbtests/macrotests.cpp

src/mongo/dbtests/matchertests.cpp

src/mongo/dbtests/namespacetests.cpp

src/mongo/dbtests/pdfiletests.cpp

src/mongo/dbtests/perf/btreeperf.cpp

src/mongo/dbtests/perf/perftest.cpp

src/mongo/dbtests/perftests.cpp

src/mongo/dbtests/queryoptimizercursortests.cpp

src/mongo/dbtests/queryoptimizertests.cpp

src/mongo/dbtests/querytests.cpp

src/mongo/dbtests/queryutiltests.cpp

src/mongo/dbtests/removetests.cpp

src/mongo/dbtests/replica_set_monitor_test.cpp

src/mongo/dbtests/replsettests.cpp

src/mongo/dbtests/repltests.cpp

src/mongo/dbtests/sharding.cpp

src/mongo/dbtests/spin_lock_test.cpp

src/mongo/dbtests/stacktests.cpp

src/mongo/dbtests/test.sln

src/mongo/dbtests/test.vcxproj

src/mongo/dbtests/test.vcxproj.filters

src/mongo/dbtests/updatetests.cpp

src/mongo/pch.h

src/mongo/platform/SConscript

src/mongo/platform/atomic_intrinsics_gcc.h

src/mongo/platform/atomic_intrinsics_win32.h

src/mongo/platform/atomic_word.h

src/mongo/platform/basic.h

src/mongo/platform/bits.h

src/mongo/platform/bits_test.cpp

src/mongo/platform/windows_basic.h

src/mongo/s/balance.cpp

src/mongo/s/balance.h

src/mongo/s/balancer_policy.cpp

src/mongo/s/balancer_policy.h

src/mongo/s/balancer_policy_tests.cpp

src/mongo/s/chunk.cpp

src/mongo/s/chunk.h

src/mongo/s/chunk_diff.h

src/mongo/s/chunk_diff.hpp

src/mongo/s/client_info.cpp

src/mongo/s/client_info.h

src/mongo/s/commands_admin.cpp

src/mongo/s/commands_public.cpp

src/mongo/s/config.cpp

src/mongo/s/config.h

src/mongo/s/cursors.cpp

src/mongo/s/cursors.h

src/mongo/s/d_chunk_manager.cpp

src/mongo/s/d_chunk_manager.h

src/mongo/s/d_logic.cpp

src/mongo/s/d_logic.h

src/mongo/s/d_migrate.cpp

src/mongo/s/d_split.cpp

src/mongo/s/d_state.cpp

src/mongo/s/d_writeback.cpp

src/mongo/s/d_writeback.h

src/mongo/s/default_version.cpp

src/mongo/s/grid.cpp

src/mongo/s/grid.h

src/mongo/s/mongos.sln

src/mongo/s/mongos.vcxproj

src/mongo/s/mongos.vcxproj.filters

src/mongo/s/request.cpp

src/mongo/s/request.h

src/mongo/s/s_only.cpp

src/mongo/s/server.cpp

src/mongo/s/shard.cpp

src/mongo/s/shard.h

src/mongo/s/shard_test.cpp

src/mongo/s/shardconnection.cpp

src/mongo/s/shardkey.cpp

src/mongo/s/shardkey.h

src/mongo/s/strategy.cpp

src/mongo/s/strategy.h

src/mongo/s/strategy_shard.cpp

src/mongo/s/strategy_single.cpp

src/mongo/s/writeback_listener.cpp

src/mongo/s/writeback_listener.h

src/mongo/scripting/bench.cpp

src/mongo/scripting/bench.h

src/mongo/scripting/bson_template_evaluator.cpp

src/mongo/scripting/bson_template_evaluator.h

src/mongo/scripting/bson_template_evaluator_test.cpp

src/mongo/scripting/engine.cpp

src/mongo/scripting/engine.h

src/mongo/scripting/engine_none.cpp

src/mongo/scripting/engine_spidermonkey.cpp

src/mongo/scripting/engine_spidermonkey.h

src/mongo/scripting/engine_v8.cpp

src/mongo/scripting/engine_v8.h

src/mongo/scripting/sm_db.cpp

src/mongo/scripting/utils.cpp

src/mongo/scripting/v8_db.cpp

src/mongo/scripting/v8_db.h

src/mongo/scripting/v8_utils.cpp

src/mongo/scripting/v8_utils.h

src/mongo/server.h

src/mongo/shell/collection.js

src/mongo/shell/createCPPfromJavaScriptFiles.js

src/mongo/shell/db.js

src/mongo/shell/dbshell.cpp

src/mongo/shell/linenoise.cpp

src/mongo/shell/mk_wcwidth.cpp

src/mongo/shell/mk_wcwidth.h

src/mongo/shell/mongo.js

src/mongo/shell/mongo.sln

src/mongo/shell/mongo.vcxproj

src/mongo/shell/mongo.vcxproj.filters

src/mongo/shell/query.js

src/mongo/shell/replsettest.js

src/mongo/shell/servers.js

src/mongo/shell/servers_misc.js

src/mongo/shell/shardingtest.js

src/mongo/shell/shell_utils.cpp

src/mongo/shell/shell_utils_extended.cpp

src/mongo/shell/shell_utils_launcher.cpp

src/mongo/shell/shell_utils_launcher.h

src/mongo/shell/utils.js

src/mongo/shell/utils_sh.js

src/mongo/targetver.h

src/mongo/tools/bridge.cpp

src/mongo/tools/bsondump.cpp

src/mongo/tools/docgeneratormain.cpp

src/mongo/tools/dump.cpp

src/mongo/tools/export.cpp

src/mongo/tools/files.cpp

src/mongo/tools/import.cpp

src/mongo/tools/loadgenerator.cpp

src/mongo/tools/oplog.cpp

src/mongo/tools/restore.cpp

src/mongo/tools/sniffer.cpp

src/mongo/tools/stat.cpp

src/mongo/tools/tool.cpp

src/mongo/tools/tool.h

src/mongo/tools/top.cpp

src/mongo/unittest/SConscript

src/mongo/unittest/unittest.h

src/mongo/unittest/unittest_main.cpp

src/mongo/unittest/unittest_test.cpp

src/mongo/util/admin_access.h

src/mongo/util/alignedbuilder.cpp

src/mongo/util/alignedbuilder.h

src/mongo/util/assert_util.cpp

src/mongo/util/assert_util.h

src/mongo/util/background.cpp

src/mongo/util/background.h

src/mongo/util/base64.cpp

src/mongo/util/base64.h

src/mongo/util/checksum.h

src/mongo/util/compress.cpp

src/mongo/util/compress.h

src/mongo/util/concurrency/list.h

src/mongo/util/concurrency/mapsf.h

src/mongo/util/concurrency/msg.h

src/mongo/util/concurrency/mutexdebugger.cpp

src/mongo/util/concurrency/mutexdebugger.h

src/mongo/util/concurrency/qlock.h

src/mongo/util/concurrency/race.h

src/mongo/util/concurrency/rwlock.h

src/mongo/util/concurrency/rwlockimpl.cpp

src/mongo/util/concurrency/rwlockimpl.h

src/mongo/util/concurrency/simplerwlock.h

src/mongo/util/concurrency/spin_lock.cpp

src/mongo/util/concurrency/task.cpp

src/mongo/util/concurrency/ticketholder.h

src/mongo/util/file.h

src/mongo/util/file_allocator.h

src/mongo/util/goodies.h

src/mongo/util/hashtab.h

src/mongo/util/hex.h

src/mongo/util/intrusive_counter.cpp *

src/mongo/util/intrusive_counter.h *

src/mongo/util/lame_stacktrace_test.cpp

src/mongo/util/log.cpp

src/mongo/util/log.h

src/mongo/util/logfile.cpp

src/mongo/util/logfile.h

src/mongo/util/map_util.h

src/mongo/util/md5.hpp

src/mongo/util/md5main.cpp

src/mongo/util/mmap.cpp

src/mongo/util/mmap.h

src/mongo/util/mmap_win.cpp

src/mongo/util/mongoutils/html.h

src/mongo/util/mongoutils/str.h

src/mongo/util/net/hostandport.h

src/mongo/util/net/httpclient.cpp

src/mongo/util/net/httpclient.h

src/mongo/util/net/listen.cpp

src/mongo/util/net/listen.h

src/mongo/util/net/message.h

src/mongo/util/net/message_port.cpp

src/mongo/util/net/message_port.h

src/mongo/util/net/message_server_port.cpp

src/mongo/util/net/miniwebserver.cpp

src/mongo/util/net/miniwebserver.h

src/mongo/util/net/sock.cpp

src/mongo/util/net/sock.h

src/mongo/util/ntservice.cpp

src/mongo/util/ntservice.h

src/mongo/util/paths.h

src/mongo/util/processinfo.cpp

src/mongo/util/processinfo.h

src/mongo/util/processinfo_darwin.cpp

src/mongo/util/processinfo_freebsd.cpp

src/mongo/util/processinfo_linux2.cpp

src/mongo/util/processinfo_none.cpp

src/mongo/util/processinfo_win32.cpp *

src/mongo/util/progress_meter.cpp

src/mongo/util/progress_meter.h

src/mongo/util/queue.h

src/mongo/util/ramlog.cpp

src/mongo/util/ramlog.h

src/mongo/util/stack_introspect.cpp

src/mongo/util/stack_introspect.h

src/mongo/util/stacktrace.cpp

src/mongo/util/stacktrace.h

src/mongo/util/startup_test.cpp

src/mongo/util/stringutils.cpp

src/mongo/util/stringutils.h

src/mongo/util/systeminfo.h

src/mongo/util/systeminfo_linux2.cpp

src/mongo/util/systeminfo_none.cpp

src/mongo/util/systeminfo_win32.cpp

src/mongo/util/text.cpp

src/mongo/util/text.h

src/mongo/util/time_support.cpp

src/mongo/util/time_support.h

src/mongo/util/timer-win32-inl.h

src/mongo/util/timer.cpp

src/mongo/util/touch_pages.cpp

src/mongo/util/version.cpp

src/mongo/util/version.h

src/third_party/SConscript

src/third_party/boost/boost/functional/hash/detail/float_functions.hpp

src/third_party/gperftools-2.0/SConscript

src/third_party/gperftools-2.0/src/config-10gen-linux.h

src/third_party/gperftools-2.0/src/config.h

src/third_party/js-1.7/SpiderMonkey-prebuild.vcxproj

Show diffs side-by-side

added added

removed removed

src/third_party/v8/src/jsregexp.cc

// Redistribution and use in source and binary forms, with or without

// modification, are permitted provided that the following conditions are

// met:

// * Redistributions of source code must retain the above copyright

// notice, this list of conditions and the following disclaimer.

// * Redistributions in binary form must reproduce the above

// copyright notice, this list of conditions and the following

// disclaimer in the documentation and/or other materials provided

// with the distribution.

// * Neither the name of Google Inc. nor the names of its

// contributors may be used to endorse or promote products derived

// from this software without specific prior written permission.

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "v8.h"

#include "ast.h"

#include "compiler.h"

#include "execution.h"

#include "factory.h"

#include "jsregexp.h"

#include "platform.h"

#include "string-search.h"

#include "runtime.h"

#include "compilation-cache.h"

#include "string-stream.h"

#include "parser.h"

#include "regexp-macro-assembler.h"

#include "regexp-macro-assembler-tracer.h"

#include "regexp-macro-assembler-irregexp.h"

#include "regexp-stack.h"

#ifndef V8_INTERPRETED_REGEXP

#if V8_TARGET_ARCH_IA32

#include "ia32/regexp-macro-assembler-ia32.h"

#elif V8_TARGET_ARCH_X64

#include "x64/regexp-macro-assembler-x64.h"

#elif V8_TARGET_ARCH_ARM

#include "arm/regexp-macro-assembler-arm.h"

#elif V8_TARGET_ARCH_MIPS

#include "mips/regexp-macro-assembler-mips.h"

#else

#error Unsupported target architecture.

#endif

#include "interpreter-irregexp.h"

namespace v8 {

namespace internal {

Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,

Handle<String> pattern,

Handle<String> flags,

bool* has_pending_exception) {

// Call the construct code with 2 arguments.

Handle<Object> argv[] = { pattern, flags };

return Execution::New(constructor, ARRAY_SIZE(argv), argv,

has_pending_exception);

}

static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {

int flags = JSRegExp::NONE;

for (int i = 0; i < str->length(); i++) {

switch (str->Get(i)) {

case 'i':

flags |= JSRegExp::IGNORE_CASE;

break;

case 'g':

flags |= JSRegExp::GLOBAL;

break;

case 'm':

flags |= JSRegExp::MULTILINE;

break;

}

return JSRegExp::Flags(flags);

}

static inline void ThrowRegExpException(Handle<JSRegExp> re,

Handle<String> pattern,

Handle<String> error_text,

const char* message) {

100

Isolate* isolate = re->GetIsolate();

101

Factory* factory = isolate->factory();

102

Handle<FixedArray> elements = factory->NewFixedArray(2);

103

elements->set(0, *pattern);

104

elements->set(1, *error_text);

105

Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

106

Handle<Object> regexp_err = factory->NewSyntaxError(message, array);

107

isolate->Throw(*regexp_err);

108

}

109

110

111

ContainedInLattice AddRange(ContainedInLattice containment,

112

const int* ranges,

113

int ranges_length,

114

Interval new_range) {

115

ASSERT((ranges_length & 1) == 1);

116

ASSERT(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);

117

if (containment == kLatticeUnknown) return containment;

118

bool inside = false;

119

int last = 0;

120

for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {

121

// Consider the range from last to ranges[i].

122

// We haven't got to the new range yet.

123

if (ranges[i] <= new_range.from()) continue;

124

// New range is wholly inside last-ranges[i]. Note that new_range.to() is

125

// inclusive, but the values in ranges are not.

126

if (last <= new_range.from() && new_range.to() < ranges[i]) {

127

return Combine(containment, inside ? kLatticeIn : kLatticeOut);

128

}

129

return kLatticeUnknown;

130

}

131

return containment;

132

}

133

134

135

// More makes code generation slower, less makes V8 benchmark score lower.

136

const int kMaxLookaheadForBoyerMoore = 8;

137

// In a 3-character pattern you can maximally step forwards 3 characters

138

// at a time, which is not always enough to pay for the extra logic.

139

const int kPatternTooShortForBoyerMoore = 2;

140

141

142

// Identifies the sort of regexps where the regexp engine is faster

143

// than the code used for atom matches.

144

static bool HasFewDifferentCharacters(Handle<String> pattern) {

145

int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());

146

if (length <= kPatternTooShortForBoyerMoore) return false;

147

const int kMod = 128;

148

bool character_found[kMod];

149

int different = 0;

150

memset(&character_found[0], 0, sizeof(character_found));

151

for (int i = 0; i < length; i++) {

152

int ch = (pattern->Get(i) & (kMod - 1));

153

if (!character_found[ch]) {

154

character_found[ch] = true;

155

different++;

156

// We declare a regexp low-alphabet if it has at least 3 times as many

157

// characters as it has different characters.

158

if (different * 3 > length) return false;

159

}

160

}

161

return true;

162

}

163

164

165

// Generic RegExp methods. Dispatches to implementation specific methods.

166

167

168

Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,

169

Handle<String> pattern,

170

Handle<String> flag_str,

171

Zone* zone) {

172

ZoneScope zone_scope(zone, DELETE_ON_EXIT);

173

Isolate* isolate = re->GetIsolate();

174

JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);

175

CompilationCache* compilation_cache = isolate->compilation_cache();

176

Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);

177

bool in_cache = !cached.is_null();

178

LOG(isolate, RegExpCompileEvent(re, in_cache));

179

180

Handle<Object> result;

181

if (in_cache) {

182

re->set_data(*cached);

183

return re;

184

}

185

pattern = FlattenGetString(pattern);

186

PostponeInterruptsScope postpone(isolate);

187

RegExpCompileData parse_result;

188

FlatStringReader reader(isolate, pattern);

189

if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),

190

&parse_result, zone)) {

191

// Throw an exception if we fail to parse the pattern.

192

ThrowRegExpException(re,

193

pattern,

194

parse_result.error,

195

"malformed_regexp");

196

return Handle<Object>::null();

197

}

198

199

bool has_been_compiled = false;

200

201

if (parse_result.simple &&

202

!flags.is_ignore_case() &&

203

!HasFewDifferentCharacters(pattern)) {

204

// Parse-tree is a single atom that is equal to the pattern.

205

AtomCompile(re, pattern, flags, pattern);

206

has_been_compiled = true;

207

} else if (parse_result.tree->IsAtom() &&

208

!flags.is_ignore_case() &&

209

parse_result.capture_count == 0) {

210

RegExpAtom* atom = parse_result.tree->AsAtom();

211

Vector<const uc16> atom_pattern = atom->data();

212

Handle<String> atom_string =

213

isolate->factory()->NewStringFromTwoByte(atom_pattern);

214

if (!HasFewDifferentCharacters(atom_string)) {

215

AtomCompile(re, pattern, flags, atom_string);

216

has_been_compiled = true;

217

}

218

}

219

if (!has_been_compiled) {

220

IrregexpInitialize(re, pattern, flags, parse_result.capture_count);

221

}

222

ASSERT(re->data()->IsFixedArray());

223

// Compilation succeeded so the data is set on the regexp

224

// and we can store it in the cache.

225

Handle<FixedArray> data(FixedArray::cast(re->data()));

226

compilation_cache->PutRegExp(pattern, flags, data);

227

228

return re;

229

}

230

231

232

Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,

233

Handle<String> subject,

234

int index,

235

Handle<JSArray> last_match_info) {

236

switch (regexp->TypeTag()) {

237

case JSRegExp::ATOM:

238

return AtomExec(regexp, subject, index, last_match_info);

239

case JSRegExp::IRREGEXP: {

240

Handle<Object> result =

241

IrregexpExec(regexp, subject, index, last_match_info);

242

ASSERT(!result.is_null() ||

243

regexp->GetIsolate()->has_pending_exception());

244

return result;

245

}

246

default:

247

UNREACHABLE();

248

return Handle<Object>::null();

249

}

250

}

251

252

253

// RegExp Atom implementation: Simple string search using indexOf.

254

255

256

void RegExpImpl::AtomCompile(Handle<JSRegExp> re,

257

Handle<String> pattern,

258

JSRegExp::Flags flags,

259

Handle<String> match_pattern) {

260

re->GetIsolate()->factory()->SetRegExpAtomData(re,

261

JSRegExp::ATOM,

262

pattern,

263

flags,

264

match_pattern);

265

}

266

267

268

static void SetAtomLastCapture(FixedArray* array,

269

String* subject,

270

int from,

271

int to) {

272

NoHandleAllocation no_handles;

273

RegExpImpl::SetLastCaptureCount(array, 2);

274

RegExpImpl::SetLastSubject(array, subject);

275

RegExpImpl::SetLastInput(array, subject);

276

RegExpImpl::SetCapture(array, 0, from);

277

RegExpImpl::SetCapture(array, 1, to);

278

}

279

280

281

Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,

282

Handle<String> subject,

283

int index,

284

Handle<JSArray> last_match_info) {

285

Isolate* isolate = re->GetIsolate();

286

287

ASSERT(0 <= index);

288

ASSERT(index <= subject->length());

289

290

if (!subject->IsFlat()) FlattenString(subject);

291

AssertNoAllocation no_heap_allocation; // ensure vectors stay valid

292

293

String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));

294

int needle_len = needle->length();

295

ASSERT(needle->IsFlat());

296

297

if (needle_len != 0) {

298

if (index + needle_len > subject->length()) {

299

return isolate->factory()->null_value();

300

}

301

302

String::FlatContent needle_content = needle->GetFlatContent();

303

String::FlatContent subject_content = subject->GetFlatContent();

304

ASSERT(needle_content.IsFlat());

305

ASSERT(subject_content.IsFlat());

306

// dispatch on type of strings

307

index = (needle_content.IsAscii()

308

? (subject_content.IsAscii()

309

? SearchString(isolate,

310

subject_content.ToAsciiVector(),

311

needle_content.ToAsciiVector(),

312

index)

313

: SearchString(isolate,

314

subject_content.ToUC16Vector(),

315

needle_content.ToAsciiVector(),

316

index))

317

: (subject_content.IsAscii()

318

? SearchString(isolate,

319

subject_content.ToAsciiVector(),

320

needle_content.ToUC16Vector(),

321

index)

322

: SearchString(isolate,

323

subject_content.ToUC16Vector(),

324

needle_content.ToUC16Vector(),

325

index)));

326

if (index == -1) return isolate->factory()->null_value();

327

}

328

ASSERT(last_match_info->HasFastObjectElements());

329

330

{

331

NoHandleAllocation no_handles;

332

FixedArray* array = FixedArray::cast(last_match_info->elements());

333

SetAtomLastCapture(array, *subject, index, index + needle_len);

334

}

335

return last_match_info;

336

}

337

338

339

// Irregexp implementation.

340

341

// Ensures that the regexp object contains a compiled version of the

342

// source for either ASCII or non-ASCII strings.

343

// If the compiled version doesn't already exist, it is compiled

344

// from the source pattern.

345

// If compilation fails, an exception is thrown and this function

346

// returns false.

347

bool RegExpImpl::EnsureCompiledIrregexp(

348

Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {

349

Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));

350

#ifdef V8_INTERPRETED_REGEXP

351

if (compiled_code->IsByteArray()) return true;

352

#else // V8_INTERPRETED_REGEXP (RegExp native code)

353

if (compiled_code->IsCode()) return true;

354

#endif

355

// We could potentially have marked this as flushable, but have kept

356

// a saved version if we did not flush it yet.

357

Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));

358

if (saved_code->IsCode()) {

359

// Reinstate the code in the original place.

360

re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);

361

ASSERT(compiled_code->IsSmi());

362

return true;

363

}

364

return CompileIrregexp(re, sample_subject, is_ascii);

365

}

366

367

368

static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,

369

bool is_ascii,

370

Handle<String> error_message,

371

Isolate* isolate) {

372

Factory* factory = isolate->factory();

373

Handle<FixedArray> elements = factory->NewFixedArray(2);

374

elements->set(0, re->Pattern());

375

elements->set(1, *error_message);

376

Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

377

Handle<Object> regexp_err =

378

factory->NewSyntaxError("malformed_regexp", array);

379

isolate->Throw(*regexp_err);

380

return false;

381

}

382

383

384

bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,

385

Handle<String> sample_subject,

386

bool is_ascii) {

387

// Compile the RegExp.

388

Isolate* isolate = re->GetIsolate();

389

ZoneScope zone_scope(isolate->runtime_zone(), DELETE_ON_EXIT);

390

PostponeInterruptsScope postpone(isolate);

391

// If we had a compilation error the last time this is saved at the

392

// saved code index.

393

Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));

394

// When arriving here entry can only be a smi, either representing an

395

// uncompiled regexp, a previous compilation error, or code that has

396

// been flushed.

397

ASSERT(entry->IsSmi());

398

int entry_value = Smi::cast(entry)->value();

399

ASSERT(entry_value == JSRegExp::kUninitializedValue ||

400

entry_value == JSRegExp::kCompilationErrorValue ||

401

(entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));

402

403

if (entry_value == JSRegExp::kCompilationErrorValue) {

404

// A previous compilation failed and threw an error which we store in

405

// the saved code index (we store the error message, not the actual

406

// error). Recreate the error object and throw it.

407

Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));

408

ASSERT(error_string->IsString());

409

Handle<String> error_message(String::cast(error_string));

410

CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);

411

return false;

412

}

413

414

JSRegExp::Flags flags = re->GetFlags();

415

416

Handle<String> pattern(re->Pattern());

417

if (!pattern->IsFlat()) FlattenString(pattern);

418

RegExpCompileData compile_data;

419

FlatStringReader reader(isolate, pattern);

420

Zone* zone = isolate->runtime_zone();

421

if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),

422

&compile_data,

423

zone)) {

424

// Throw an exception if we fail to parse the pattern.

425

// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.

426

ThrowRegExpException(re,

427

pattern,

428

compile_data.error,

429

"malformed_regexp");

430

return false;

431

}

432

RegExpEngine::CompilationResult result =

433

RegExpEngine::Compile(&compile_data,

434

flags.is_ignore_case(),

435

flags.is_global(),

436

flags.is_multiline(),

437

pattern,

438

sample_subject,

439

is_ascii,

440

zone);

441

if (result.error_message != NULL) {

442

// Unable to compile regexp.

443

Handle<String> error_message =

444

isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));

445

CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);

446

return false;

447

}

448

449

Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));

450

data->set(JSRegExp::code_index(is_ascii), result.code);

451

int register_max = IrregexpMaxRegisterCount(*data);

452

if (result.num_registers > register_max) {

453

SetIrregexpMaxRegisterCount(*data, result.num_registers);

454

}

455

456

return true;

457

}

458

459

460

int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {

461

return Smi::cast(

462

re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();

463

}

464

465

466

void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {

467

re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));

468

}

469

470

471

int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {

472

return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();

473

}

474

475

476

int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {

477

return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();

478

}

479

480

481

ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {

482

return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));

483

}

484

485

486

Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {

487

return Code::cast(re->get(JSRegExp::code_index(is_ascii)));

488

}

489

490

491

void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,

492

Handle<String> pattern,

493

JSRegExp::Flags flags,

494

int capture_count) {

495

// Initialize compiled code entries to null.

496

re->GetIsolate()->factory()->SetRegExpIrregexpData(re,

497

JSRegExp::IRREGEXP,

498

pattern,

499

flags,

500

capture_count);

501

}

502

503

504

int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,

505

Handle<String> subject) {

506

if (!subject->IsFlat()) FlattenString(subject);

507

508

// Check the asciiness of the underlying storage.

509

bool is_ascii = subject->IsAsciiRepresentationUnderneath();

510

if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;

511

512

#ifdef V8_INTERPRETED_REGEXP

513

// Byte-code regexp needs space allocated for all its registers.

514

return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));

515

#else // V8_INTERPRETED_REGEXP

516

// Native regexp only needs room to output captures. Registers are handled

517

// internally.

518

return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;

519

#endif // V8_INTERPRETED_REGEXP

520

}

521

522

523

int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,

524

int registers_per_match,

525

int* max_matches) {

526

#ifdef V8_INTERPRETED_REGEXP

527

// Global loop in interpreted regexp is not implemented. Therefore we choose

528

// the size of the offsets vector so that it can only store one match.

529

*max_matches = 1;

530

return registers_per_match;

531

#else // V8_INTERPRETED_REGEXP

532

int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);

533

*max_matches = size / registers_per_match;

534

return size;

535

#endif // V8_INTERPRETED_REGEXP

536

}

537

538

539

int RegExpImpl::IrregexpExecRaw(

540

Handle<JSRegExp> regexp,

541

Handle<String> subject,

542

int index,

543

Vector<int> output) {

544

Isolate* isolate = regexp->GetIsolate();

545

546

Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);

547

548

ASSERT(index >= 0);

549

ASSERT(index <= subject->length());

550

ASSERT(subject->IsFlat());

551

552

bool is_ascii = subject->IsAsciiRepresentationUnderneath();

553

554

#ifndef V8_INTERPRETED_REGEXP

555

ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);

556

do {

557

EnsureCompiledIrregexp(regexp, subject, is_ascii);

558

Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);

559

NativeRegExpMacroAssembler::Result res =

560

NativeRegExpMacroAssembler::Match(code,

561

subject,

562

output.start(),

563

output.length(),

564

index,

565

isolate);

566

if (res != NativeRegExpMacroAssembler::RETRY) {

567

ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||

568

isolate->has_pending_exception());

569

STATIC_ASSERT(

570

static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);

571

STATIC_ASSERT(

572

static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);

573

STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)

574

== RE_EXCEPTION);

575

return static_cast<IrregexpResult>(res);

576

}

577

// If result is RETRY, the string has changed representation, and we

578

// must restart from scratch.

579

// In this case, it means we must make sure we are prepared to handle

580

// the, potentially, different subject (the string can switch between

581

// being internal and external, and even between being ASCII and UC16,

582

// but the characters are always the same).

583

IrregexpPrepare(regexp, subject);

584

is_ascii = subject->IsAsciiRepresentationUnderneath();

585

} while (true);

586

UNREACHABLE();

587

return RE_EXCEPTION;

588

#else // V8_INTERPRETED_REGEXP

589

590

ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));

591

// We must have done EnsureCompiledIrregexp, so we can get the number of

592

// registers.

593

int* register_vector = output.start();

594

int number_of_capture_registers =

595

(IrregexpNumberOfCaptures(*irregexp) + 1) * 2;

596

for (int i = number_of_capture_registers - 1; i >= 0; i--) {

597

register_vector[i] = -1;

598

}

599

Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);

600

601

IrregexpResult result = IrregexpInterpreter::Match(isolate,

602

byte_codes,

603

subject,

604

register_vector,

605

index);

606

if (result == RE_EXCEPTION) {

607

ASSERT(!isolate->has_pending_exception());

608

isolate->StackOverflow();

609

}

610

return result;

611

#endif // V8_INTERPRETED_REGEXP

612

}

613

614

615

Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,

616

Handle<String> subject,

617

int previous_index,

618

Handle<JSArray> last_match_info) {

619

Isolate* isolate = jsregexp->GetIsolate();

620

ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);

621

622

// Prepare space for the return values.

623

#ifdef V8_INTERPRETED_REGEXP

624

#ifdef DEBUG

625

if (FLAG_trace_regexp_bytecodes) {

626

String* pattern = jsregexp->Pattern();

627

PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));

628

PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));

629

}

630

#endif

631

#endif

632

int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);

633

if (required_registers < 0) {

634

// Compiling failed with an exception.

635

ASSERT(isolate->has_pending_exception());

636

return Handle<Object>::null();

637

}

638

639

OffsetsVector registers(required_registers, isolate);

640

641

int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index,

642

Vector<int>(registers.vector(),

643

registers.length()));

644

if (res == RE_SUCCESS) {

645

int capture_register_count =

646

(IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;

647

last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);

648

AssertNoAllocation no_gc;

649

int* register_vector = registers.vector();

650

FixedArray* array = FixedArray::cast(last_match_info->elements());

651

for (int i = 0; i < capture_register_count; i += 2) {

652

SetCapture(array, i, register_vector[i]);

653

SetCapture(array, i + 1, register_vector[i + 1]);

654

}

655

SetLastCaptureCount(array, capture_register_count);

656

SetLastSubject(array, *subject);

657

SetLastInput(array, *subject);

658

return last_match_info;

659

}

660

if (res == RE_EXCEPTION) {

661

ASSERT(isolate->has_pending_exception());

662

return Handle<Object>::null();

663

}

664

ASSERT(res == RE_FAILURE);

665

return isolate->factory()->null_value();

666

}

667

668

669

// -------------------------------------------------------------------

670

// Implementation of the Irregexp regular expression engine.

671

672

// The Irregexp regular expression engine is intended to be a complete

673

// implementation of ECMAScript regular expressions. It generates either

674

// bytecodes or native code.

675

676

// The Irregexp regexp engine is structured in three steps.

677

// 1) The parser generates an abstract syntax tree. See ast.cc.

678

// 2) From the AST a node network is created. The nodes are all

679

// subclasses of RegExpNode. The nodes represent states when

680

// executing a regular expression. Several optimizations are

681

// performed on the node network.

682

// 3) From the nodes we generate either byte codes or native code

683

// that can actually execute the regular expression (perform

684

// the search). The code generation step is described in more

685

// detail below.

686

687

// Code generation.

688

689

// The nodes are divided into four main categories.

690

// * Choice nodes

691

// These represent places where the regular expression can

692

// match in more than one way. For example on entry to an

693

// alternation (foo|bar) or a repetition (*, +, ? or {}).

694

// * Action nodes

695

// These represent places where some action should be

696

// performed. Examples include recording the current position

697

// in the input string to a register (in order to implement

698

// captures) or other actions on register for example in order

699

// to implement the counters needed for {} repetitions.

700

// * Matching nodes

701

// These attempt to match some element part of the input string.

702

// Examples of elements include character classes, plain strings

703

// or back references.

704

// * End nodes

705

// These are used to implement the actions required on finding

706

// a successful match or failing to find a match.

707

708

// The code generated (whether as byte codes or native code) maintains

709

// some state as it runs. This consists of the following elements:

710

711

// * The capture registers. Used for string captures.

712

// * Other registers. Used for counters etc.

713

// * The current position.

714

// * The stack of backtracking information. Used when a matching node

715

// fails to find a match and needs to try an alternative.

716

717

// Conceptual regular expression execution model:

718

719

// There is a simple conceptual model of regular expression execution

720

// which will be presented first. The actual code generated is a more

721

// efficient simulation of the simple conceptual model:

722

723

// * Choice nodes are implemented as follows:

724

// For each choice except the last {

725

// push current position

726

// push backtrack code location

727

// <generate code to test for choice>

728

// backtrack code location:

729

// pop current position

730

// }

731

// <generate code to test for last choice>

732

733

// * Actions nodes are generated as follows

734

// <push affected registers on backtrack stack>

735

// <generate code to perform action>

736

// push backtrack code location

737

// <generate code to test for following nodes>

738

// backtrack code location:

739

// <pop affected registers to restore their state>

740

// <pop backtrack location from stack and go to it>

741

742

// * Matching nodes are generated as follows:

743

// if input string matches at current position

744

// update current position

745

// <generate code to test for following nodes>

746

// else

747

// <pop backtrack location from stack and go to it>

748

749

// Thus it can be seen that the current position is saved and restored

750

// by the choice nodes, whereas the registers are saved and restored by

751

// by the action nodes that manipulate them.

752

753

// The other interesting aspect of this model is that nodes are generated

754

// at the point where they are needed by a recursive call to Emit(). If

755

// the node has already been code generated then the Emit() call will

756

// generate a jump to the previously generated code instead. In order to

757

// limit recursion it is possible for the Emit() function to put the node

758

// on a work list for later generation and instead generate a jump. The

759

// destination of the jump is resolved later when the code is generated.

760

761

// Actual regular expression code generation.

762

763

// Code generation is actually more complicated than the above. In order

764

// to improve the efficiency of the generated code some optimizations are

765

// performed

766

767

// * Choice nodes have 1-character lookahead.

768

// A choice node looks at the following character and eliminates some of

769

// the choices immediately based on that character. This is not yet

770

// implemented.

771

// * Simple greedy loops store reduced backtracking information.

772

// A quantifier like /.*foo/m will greedily match the whole input. It will

773

// then need to backtrack to a point where it can match "foo". The naive

774

// implementation of this would push each character position onto the

775

// backtracking stack, then pop them off one by one. This would use space

776

// proportional to the length of the input string. However since the "."

777

// can only match in one way and always has a constant length (in this case

778

// of 1) it suffices to store the current position on the top of the stack

779

// once. Matching now becomes merely incrementing the current position and

780

// backtracking becomes decrementing the current position and checking the

781

// result against the stored current position. This is faster and saves

782

// space.

783

// * The current state is virtualized.

784

// This is used to defer expensive operations until it is clear that they

785

// are needed and to generate code for a node more than once, allowing

786

// specialized an efficient versions of the code to be created. This is

787

// explained in the section below.

788

789

// Execution state virtualization.

790

791

// Instead of emitting code, nodes that manipulate the state can record their

792

// manipulation in an object called the Trace. The Trace object can record a

793

// current position offset, an optional backtrack code location on the top of

794

// the virtualized backtrack stack and some register changes. When a node is

795

// to be emitted it can flush the Trace or update it. Flushing the Trace

796

// will emit code to bring the actual state into line with the virtual state.

797

// Avoiding flushing the state can postpone some work (e.g. updates of capture

798

// registers). Postponing work can save time when executing the regular

799

// expression since it may be found that the work never has to be done as a

800

// failure to match can occur. In addition it is much faster to jump to a

801

// known backtrack code location than it is to pop an unknown backtrack

802

// location from the stack and jump there.

803

804

// The virtual state found in the Trace affects code generation. For example

805

// the virtual state contains the difference between the actual current

806

// position and the virtual current position, and matching code needs to use

807

// this offset to attempt a match in the correct location of the input

808

// string. Therefore code generated for a non-trivial trace is specialized

809

// to that trace. The code generator therefore has the ability to generate

810

// code for each node several times. In order to limit the size of the

811

// generated code there is an arbitrary limit on how many specialized sets of

812

// code may be generated for a given node. If the limit is reached, the

813

// trace is flushed and a generic version of the code for a node is emitted.

814

// This is subsequently used for that node. The code emitted for non-generic

815

// trace is not recorded in the node and so it cannot currently be reused in

816

// the event that code generation is requested for an identical trace.

817

818

819

void RegExpTree::AppendToText(RegExpText* text, Zone* zone) {

820

UNREACHABLE();

821

}

822

823

824

void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {

825

text->AddElement(TextElement::Atom(this), zone);

826

}

827

828

829

void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {

830

text->AddElement(TextElement::CharClass(this), zone);

831

}

832

833

834

void RegExpText::AppendToText(RegExpText* text, Zone* zone) {

835

for (int i = 0; i < elements()->length(); i++)

836

text->AddElement(elements()->at(i), zone);

837

}

838

839

840

TextElement TextElement::Atom(RegExpAtom* atom) {

841

TextElement result = TextElement(ATOM);

842

result.data.u_atom = atom;

843

return result;

844

}

845

846

847

TextElement TextElement::CharClass(

848

RegExpCharacterClass* char_class) {

849

TextElement result = TextElement(CHAR_CLASS);

850

result.data.u_char_class = char_class;

851

return result;

852

}

853

854

855

int TextElement::length() {

856

if (type == ATOM) {

857

return data.u_atom->length();

858

} else {

859

ASSERT(type == CHAR_CLASS);

860

return 1;

861

}

862

}

863

864

865

DispatchTable* ChoiceNode::GetTable(bool ignore_case) {

866

if (table_ == NULL) {

867

table_ = new(zone()) DispatchTable(zone());

868

DispatchTableConstructor cons(table_, ignore_case, zone());

869

cons.BuildTable(this);

870

}

871

return table_;

872

}

873

874

875

class FrequencyCollator {

876

public:

877

FrequencyCollator() : total_samples_(0) {

878

for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {

879

frequencies_[i] = CharacterFrequency(i);

880

}

881

}

882

883

void CountCharacter(int character) {

884

int index = (character & RegExpMacroAssembler::kTableMask);

885

frequencies_[index].Increment();

886

total_samples_++;

887

}

888

889

// Does not measure in percent, but rather per-128 (the table size from the

890

// regexp macro assembler).

891

int Frequency(int in_character) {

892

ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);

893

if (total_samples_ < 1) return 1; // Division by zero.

894

int freq_in_per128 =

895

(frequencies_[in_character].counter() * 128) / total_samples_;

896

return freq_in_per128;

897

}

898

899

private:

900

class CharacterFrequency {

901

public:

902

CharacterFrequency() : counter_(0), character_(-1) { }

903

explicit CharacterFrequency(int character)

904

: counter_(0), character_(character) { }

905

906

void Increment() { counter_++; }

907

int counter() { return counter_; }

908

int character() { return character_; }

909

910

private:

911

int counter_;

912

int character_;

913

};

914

915

916

private:

917

CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];

918

int total_samples_;

919

};

920

921

922

class RegExpCompiler {

923

public:

924

RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,

925

Zone* zone);

926

927

int AllocateRegister() {

928

if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {

929

reg_exp_too_big_ = true;

930

return next_register_;

931

}

932

return next_register_++;

933

}

934

935

RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,

936

RegExpNode* start,

937

int capture_count,

938

Handle<String> pattern);

939

940

inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

941

942

static const int kImplementationOffset = 0;

943

static const int kNumberOfRegistersOffset = 0;

944

static const int kCodeOffset = 1;

945

946

RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }

947

EndNode* accept() { return accept_; }

948

949

static const int kMaxRecursion = 100;

950

inline int recursion_depth() { return recursion_depth_; }

951

inline void IncrementRecursionDepth() { recursion_depth_++; }

952

inline void DecrementRecursionDepth() { recursion_depth_--; }

953

954

void SetRegExpTooBig() { reg_exp_too_big_ = true; }

955

956

inline bool ignore_case() { return ignore_case_; }

957

inline bool ascii() { return ascii_; }

958

FrequencyCollator* frequency_collator() { return &frequency_collator_; }

959

960

int current_expansion_factor() { return current_expansion_factor_; }

961

void set_current_expansion_factor(int value) {

962

current_expansion_factor_ = value;

963

}

964

965

Zone* zone() const { return zone_; }

966

967

static const int kNoRegister = -1;

968

969

private:

970

EndNode* accept_;

971

int next_register_;

972

List<RegExpNode*>* work_list_;

973

int recursion_depth_;

974

RegExpMacroAssembler* macro_assembler_;

975

bool ignore_case_;

976

bool ascii_;

977

bool reg_exp_too_big_;

978

int current_expansion_factor_;

979

FrequencyCollator frequency_collator_;

980

Zone* zone_;

981

};

982

983

984

class RecursionCheck {

985

public:

986

explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {

987

compiler->IncrementRecursionDepth();

988

}

989

~RecursionCheck() { compiler_->DecrementRecursionDepth(); }

990

private:

991

RegExpCompiler* compiler_;

992

};

993

994

995

static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {

996

return RegExpEngine::CompilationResult("RegExp too big");

997

}

998

999

1000

// Attempts to compile the regexp using an Irregexp code generator. Returns

1001

// a fixed array or a null handle depending on whether it succeeded.

1002

RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,

1003

Zone* zone)

1004

: next_register_(2 * (capture_count + 1)),

1005

work_list_(NULL),

1006

recursion_depth_(0),

1007

ignore_case_(ignore_case),

1008

ascii_(ascii),

1009

reg_exp_too_big_(false),

1010

current_expansion_factor_(1),

1011

frequency_collator_(),

1012

zone_(zone) {

1013

accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);

1014

ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);

1015

}

1016

1017

1018

RegExpEngine::CompilationResult RegExpCompiler::Assemble(

1019

RegExpMacroAssembler* macro_assembler,

1020

RegExpNode* start,

1021

int capture_count,

1022

Handle<String> pattern) {

1023

Heap* heap = pattern->GetHeap();

1024

1025

bool use_slow_safe_regexp_compiler = false;

1026

if (heap->total_regexp_code_generated() >

1027

RegExpImpl::kRegWxpCompiledLimit &&

1028

heap->isolate()->memory_allocator()->SizeExecutable() >

1029

RegExpImpl::kRegExpExecutableMemoryLimit) {

1030

use_slow_safe_regexp_compiler = true;

1031

}

1032

1033

macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);

1034

1035

#ifdef DEBUG

1036

if (FLAG_trace_regexp_assembler)

1037

macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);

1038

else

1039

#endif

1040

macro_assembler_ = macro_assembler;

1041

1042

List <RegExpNode*> work_list(0);

1043

work_list_ = &work_list;

1044

Label fail;

1045

macro_assembler_->PushBacktrack(&fail);

1046

Trace new_trace;

1047

start->Emit(this, &new_trace);

1048

macro_assembler_->Bind(&fail);

1049

macro_assembler_->Fail();

1050

while (!work_list.is_empty()) {

1051

work_list.RemoveLast()->Emit(this, &new_trace);

1052

}

1053

if (reg_exp_too_big_) return IrregexpRegExpTooBig();

1054

1055

Handle<HeapObject> code = macro_assembler_->GetCode(pattern);

1056

heap->IncreaseTotalRegexpCodeGenerated(code->Size());

1057

work_list_ = NULL;

1058

#ifdef DEBUG

1059

if (FLAG_print_code) {

1060

Handle<Code>::cast(code)->Disassemble(*pattern->ToCString());

1061

}

1062

if (FLAG_trace_regexp_assembler) {

1063

delete macro_assembler_;

1064

}

1065

#endif

1066

return RegExpEngine::CompilationResult(*code, next_register_);

1067

}

1068

1069

1070

bool Trace::DeferredAction::Mentions(int that) {

1071

if (type() == ActionNode::CLEAR_CAPTURES) {

1072

Interval range = static_cast<DeferredClearCaptures*>(this)->range();

1073

return range.Contains(that);

1074

} else {

1075

return reg() == that;

1076

}

1077

}

1078

1079

1080

bool Trace::mentions_reg(int reg) {

1081

for (DeferredAction* action = actions_;

1082

action != NULL;

1083

action = action->next()) {

1084

if (action->Mentions(reg))

1085

return true;

1086

}

1087

return false;

1088

}

1089

1090

1091

bool Trace::GetStoredPosition(int reg, int* cp_offset) {

1092

ASSERT_EQ(0, *cp_offset);

1093

for (DeferredAction* action = actions_;

1094

action != NULL;

1095

action = action->next()) {

1096

if (action->Mentions(reg)) {

1097

if (action->type() == ActionNode::STORE_POSITION) {

1098

*cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();

1099

return true;

1100

} else {

1101

return false;

1102

}

1103

}

1104

}

1105

return false;

1106

}

1107

1108

1109

int Trace::FindAffectedRegisters(OutSet* affected_registers,

1110

Zone* zone) {

1111

int max_register = RegExpCompiler::kNoRegister;

1112

for (DeferredAction* action = actions_;

1113

action != NULL;

1114

action = action->next()) {

1115

if (action->type() == ActionNode::CLEAR_CAPTURES) {

1116

Interval range = static_cast<DeferredClearCaptures*>(action)->range();

1117

for (int i = range.from(); i <= range.to(); i++)

1118

affected_registers->Set(i, zone);

1119

if (range.to() > max_register) max_register = range.to();

1120

} else {

1121

affected_registers->Set(action->reg(), zone);

1122

if (action->reg() > max_register) max_register = action->reg();

1123

}

1124

}

1125

return max_register;

1126

}

1127

1128

1129

void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,

1130

int max_register,

1131

OutSet& registers_to_pop,

1132

OutSet& registers_to_clear) {

1133

for (int reg = max_register; reg >= 0; reg--) {

1134

if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);

1135

else if (registers_to_clear.Get(reg)) {

1136

int clear_to = reg;

1137

while (reg > 0 && registers_to_clear.Get(reg - 1)) {

1138

reg--;

1139

}

1140

assembler->ClearRegisters(reg, clear_to);

1141

}

1142

}

1143

}

1144

1145

1146

void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,

1147

int max_register,

1148

OutSet& affected_registers,

1149

OutSet* registers_to_pop,

1150

OutSet* registers_to_clear,

1151

Zone* zone) {

1152

// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.

1153

const int push_limit = (assembler->stack_limit_slack() + 1) / 2;

1154

1155

// Count pushes performed to force a stack limit check occasionally.

1156

int pushes = 0;

1157

1158

for (int reg = 0; reg <= max_register; reg++) {

1159

if (!affected_registers.Get(reg)) {

1160

continue;

1161

}

1162

1163

// The chronologically first deferred action in the trace

1164

// is used to infer the action needed to restore a register

1165

// to its previous state (or not, if it's safe to ignore it).

1166

enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };

1167

DeferredActionUndoType undo_action = IGNORE;

1168

1169

int value = 0;

1170

bool absolute = false;

1171

bool clear = false;

1172

int store_position = -1;

1173

// This is a little tricky because we are scanning the actions in reverse

1174

// historical order (newest first).

1175

for (DeferredAction* action = actions_;

1176

action != NULL;

1177

action = action->next()) {

1178

if (action->Mentions(reg)) {

1179

switch (action->type()) {

1180

case ActionNode::SET_REGISTER: {

1181

Trace::DeferredSetRegister* psr =

1182

static_cast<Trace::DeferredSetRegister*>(action);

1183

if (!absolute) {

1184

value += psr->value();

1185

absolute = true;

1186

}

1187

// SET_REGISTER is currently only used for newly introduced loop

1188

// counters. They can have a significant previous value if they

1189

// occour in a loop. TODO(lrn): Propagate this information, so

1190

// we can set undo_action to IGNORE if we know there is no value to

1191

// restore.

1192

undo_action = RESTORE;

1193

ASSERT_EQ(store_position, -1);

1194

ASSERT(!clear);

1195

break;

1196

}

1197

case ActionNode::INCREMENT_REGISTER:

1198

if (!absolute) {

1199

value++;

1200

}

1201

ASSERT_EQ(store_position, -1);

1202

ASSERT(!clear);

1203

undo_action = RESTORE;

1204

break;

1205

case ActionNode::STORE_POSITION: {

1206

Trace::DeferredCapture* pc =

1207

static_cast<Trace::DeferredCapture*>(action);

1208

if (!clear && store_position == -1) {

1209

store_position = pc->cp_offset();

1210

}

1211

1212

// For captures we know that stores and clears alternate.

1213

// Other register, are never cleared, and if the occur

1214

// inside a loop, they might be assigned more than once.

1215

if (reg <= 1) {

1216

// Registers zero and one, aka "capture zero", is

1217

// always set correctly if we succeed. There is no

1218

// need to undo a setting on backtrack, because we

1219

// will set it again or fail.

1220

undo_action = IGNORE;

1221

} else {

1222

undo_action = pc->is_capture() ? CLEAR : RESTORE;

1223

}

1224

ASSERT(!absolute);

1225

ASSERT_EQ(value, 0);

1226

break;

1227

}

1228

case ActionNode::CLEAR_CAPTURES: {

1229

// Since we're scanning in reverse order, if we've already

1230

// set the position we have to ignore historically earlier

1231

// clearing operations.

1232

if (store_position == -1) {

1233

clear = true;

1234

}

1235

undo_action = RESTORE;

1236

ASSERT(!absolute);

1237

ASSERT_EQ(value, 0);

1238

break;

1239

}

1240

default:

1241

UNREACHABLE();

1242

break;

1243

}

1244

}

1245

}

1246

// Prepare for the undo-action (e.g., push if it's going to be popped).

1247

if (undo_action == RESTORE) {

1248

pushes++;

1249

RegExpMacroAssembler::StackCheckFlag stack_check =

1250

RegExpMacroAssembler::kNoStackLimitCheck;

1251

if (pushes == push_limit) {

1252

stack_check = RegExpMacroAssembler::kCheckStackLimit;

1253

pushes = 0;

1254

}

1255

1256

assembler->PushRegister(reg, stack_check);

1257

registers_to_pop->Set(reg, zone);

1258

} else if (undo_action == CLEAR) {

1259

registers_to_clear->Set(reg, zone);

1260

}

1261

// Perform the chronologically last action (or accumulated increment)

1262

// for the register.

1263

if (store_position != -1) {

1264

assembler->WriteCurrentPositionToRegister(reg, store_position);

1265

} else if (clear) {

1266

assembler->ClearRegisters(reg, reg);

1267

} else if (absolute) {

1268

assembler->SetRegister(reg, value);

1269

} else if (value != 0) {

1270

assembler->AdvanceRegister(reg, value);

1271

}

1272

}

1273

}

1274

1275

1276

// This is called as we come into a loop choice node and some other tricky

1277

// nodes. It normalizes the state of the code generator to ensure we can

1278

// generate generic code.

1279

void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {

1280

RegExpMacroAssembler* assembler = compiler->macro_assembler();

1281

1282

ASSERT(!is_trivial());

1283

1284

if (actions_ == NULL && backtrack() == NULL) {

1285

// Here we just have some deferred cp advances to fix and we are back to

1286

// a normal situation. We may also have to forget some information gained

1287

// through a quick check that was already performed.

1288

if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);

1289

// Create a new trivial state and generate the node with that.

1290

Trace new_state;

1291

successor->Emit(compiler, &new_state);

1292

return;

1293

}

1294

1295

// Generate deferred actions here along with code to undo them again.

1296

OutSet affected_registers;

1297

1298

if (backtrack() != NULL) {

1299

// Here we have a concrete backtrack location. These are set up by choice

1300

// nodes and so they indicate that we have a deferred save of the current

1301

// position which we may need to emit here.

1302

assembler->PushCurrentPosition();

1303

}

1304

1305

int max_register = FindAffectedRegisters(&affected_registers,

1306

compiler->zone());

1307

OutSet registers_to_pop;

1308

OutSet registers_to_clear;

1309

PerformDeferredActions(assembler,

1310

max_register,

1311

affected_registers,

1312

&registers_to_pop,

1313

&registers_to_clear,

1314

compiler->zone());

1315

if (cp_offset_ != 0) {

1316

assembler->AdvanceCurrentPosition(cp_offset_);

1317

}

1318

1319

// Create a new trivial state and generate the node with that.

1320

Label undo;

1321

assembler->PushBacktrack(&undo);

1322

Trace new_state;

1323

successor->Emit(compiler, &new_state);

1324

1325

// On backtrack we need to restore state.

1326

assembler->Bind(&undo);

1327

RestoreAffectedRegisters(assembler,

1328

max_register,

1329

registers_to_pop,

1330

registers_to_clear);

1331

if (backtrack() == NULL) {

1332

assembler->Backtrack();

1333

} else {

1334

assembler->PopCurrentPosition();

1335

assembler->GoTo(backtrack());

1336

}

1337

}

1338

1339

1340

void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {

1341

RegExpMacroAssembler* assembler = compiler->macro_assembler();

1342

1343

// Omit flushing the trace. We discard the entire stack frame anyway.

1344

1345

if (!label()->is_bound()) {

1346

// We are completely independent of the trace, since we ignore it,

1347

// so this code can be used as the generic version.

1348

assembler->Bind(label());

1349

}

1350

1351

// Throw away everything on the backtrack stack since the start

1352

// of the negative submatch and restore the character position.

1353

assembler->ReadCurrentPositionFromRegister(current_position_register_);

1354

assembler->ReadStackPointerFromRegister(stack_pointer_register_);

1355

if (clear_capture_count_ > 0) {

1356

// Clear any captures that might have been performed during the success

1357

// of the body of the negative look-ahead.

1358

int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;

1359

assembler->ClearRegisters(clear_capture_start_, clear_capture_end);

1360

}

1361

// Now that we have unwound the stack we find at the top of the stack the

1362

// backtrack that the BeginSubmatch node got.

1363

assembler->Backtrack();

1364

}

1365

1366

1367

void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {

1368

if (!trace->is_trivial()) {

1369

trace->Flush(compiler, this);

1370

return;

1371

}

1372

RegExpMacroAssembler* assembler = compiler->macro_assembler();

1373

if (!label()->is_bound()) {

1374

assembler->Bind(label());

1375

}

1376

switch (action_) {

1377

case ACCEPT:

1378

assembler->Succeed();

1379

return;

1380

case BACKTRACK:

1381

assembler->GoTo(trace->backtrack());

1382

return;

1383

case NEGATIVE_SUBMATCH_SUCCESS:

1384

// This case is handled in a different virtual method.

1385

UNREACHABLE();

1386

}

1387

UNIMPLEMENTED();

1388

}

1389

1390

1391

void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {

1392

if (guards_ == NULL)

1393

guards_ = new(zone) ZoneList<Guard*>(1, zone);

1394

guards_->Add(guard, zone);

1395

}

1396

1397

1398

ActionNode* ActionNode::SetRegister(int reg,

1399

int val,

1400

RegExpNode* on_success) {

1401

ActionNode* result =

1402

new(on_success->zone()) ActionNode(SET_REGISTER, on_success);

1403

result->data_.u_store_register.reg = reg;

1404

result->data_.u_store_register.value = val;

1405

return result;

1406

}

1407

1408

1409

ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {

1410

ActionNode* result =

1411

new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);

1412

result->data_.u_increment_register.reg = reg;

1413

return result;

1414

}

1415

1416

1417

ActionNode* ActionNode::StorePosition(int reg,

1418

bool is_capture,

1419

RegExpNode* on_success) {

1420

ActionNode* result =

1421

new(on_success->zone()) ActionNode(STORE_POSITION, on_success);

1422

result->data_.u_position_register.reg = reg;

1423

result->data_.u_position_register.is_capture = is_capture;

1424

return result;

1425

}

1426

1427

1428

ActionNode* ActionNode::ClearCaptures(Interval range,

1429

RegExpNode* on_success) {

1430

ActionNode* result =

1431

new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);

1432

result->data_.u_clear_captures.range_from = range.from();

1433

result->data_.u_clear_captures.range_to = range.to();

1434

return result;

1435

}

1436

1437

1438

ActionNode* ActionNode::BeginSubmatch(int stack_reg,

1439

int position_reg,

1440

RegExpNode* on_success) {

1441

ActionNode* result =

1442

new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);

1443

result->data_.u_submatch.stack_pointer_register = stack_reg;

1444

result->data_.u_submatch.current_position_register = position_reg;

1445

return result;

1446

}

1447

1448

1449

ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,

1450

int position_reg,

1451

int clear_register_count,

1452

int clear_register_from,

1453

RegExpNode* on_success) {

1454

ActionNode* result =

1455

new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);

1456

result->data_.u_submatch.stack_pointer_register = stack_reg;

1457

result->data_.u_submatch.current_position_register = position_reg;

1458

result->data_.u_submatch.clear_register_count = clear_register_count;

1459

result->data_.u_submatch.clear_register_from = clear_register_from;

1460

return result;

1461

}

1462

1463

1464

ActionNode* ActionNode::EmptyMatchCheck(int start_register,

1465

int repetition_register,

1466

int repetition_limit,

1467

RegExpNode* on_success) {

1468

ActionNode* result =

1469

new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);

1470

result->data_.u_empty_match_check.start_register = start_register;

1471

result->data_.u_empty_match_check.repetition_register = repetition_register;

1472

result->data_.u_empty_match_check.repetition_limit = repetition_limit;

1473

return result;

1474

}

1475

1476

1477

#define DEFINE_ACCEPT(Type) \

1478

void Type##Node::Accept(NodeVisitor* visitor) { \

1479

visitor->Visit##Type(this); \

1480

}

1481

FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)

1482

#undef DEFINE_ACCEPT

1483

1484

1485

void LoopChoiceNode::Accept(NodeVisitor* visitor) {

1486

visitor->VisitLoopChoice(this);

1487

}

1488

1489

1490

// -------------------------------------------------------------------

1491

// Emit code.

1492

1493

1494

void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,

1495

Guard* guard,

1496

Trace* trace) {

1497

switch (guard->op()) {

1498

case Guard::LT:

1499

ASSERT(!trace->mentions_reg(guard->reg()));

1500

macro_assembler->IfRegisterGE(guard->reg(),

1501

guard->value(),

1502

trace->backtrack());

1503

break;

1504

case Guard::GEQ:

1505

ASSERT(!trace->mentions_reg(guard->reg()));

1506

macro_assembler->IfRegisterLT(guard->reg(),

1507

guard->value(),

1508

trace->backtrack());

1509

break;

1510

}

1511

}

1512

1513

1514

// Returns the number of characters in the equivalence class, omitting those

1515

// that cannot occur in the source string because it is ASCII.

1516

static int GetCaseIndependentLetters(Isolate* isolate,

1517

uc16 character,

1518

bool ascii_subject,

1519

unibrow::uchar* letters) {

1520

int length =

1521

isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);

1522

// Unibrow returns 0 or 1 for characters where case independence is

1523

// trivial.

1524

if (length == 0) {

1525

letters[0] = character;

1526

length = 1;

1527

}

1528

if (!ascii_subject || character <= String::kMaxAsciiCharCode) {

1529

return length;

1530

}

1531

// The standard requires that non-ASCII characters cannot have ASCII

1532

// character codes in their equivalence class.

1533

return 0;

1534

}

1535

1536

1537

static inline bool EmitSimpleCharacter(Isolate* isolate,

1538

RegExpCompiler* compiler,

1539

uc16 c,

1540

Label* on_failure,

1541

int cp_offset,

1542

bool check,

1543

bool preloaded) {

1544

RegExpMacroAssembler* assembler = compiler->macro_assembler();

1545

bool bound_checked = false;

1546

if (!preloaded) {

1547

assembler->LoadCurrentCharacter(

1548

cp_offset,

1549

on_failure,

1550

check);

1551

bound_checked = true;

1552

}

1553

assembler->CheckNotCharacter(c, on_failure);

1554

return bound_checked;

1555

}

1556

1557

1558

// Only emits non-letters (things that don't have case). Only used for case

1559

// independent matches.

1560

static inline bool EmitAtomNonLetter(Isolate* isolate,

1561

RegExpCompiler* compiler,

1562

uc16 c,

1563

Label* on_failure,

1564

int cp_offset,

1565

bool check,

1566

bool preloaded) {

1567

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1568

bool ascii = compiler->ascii();

1569

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1570

int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

1571

if (length < 1) {

1572

// This can't match. Must be an ASCII subject and a non-ASCII character.

1573

// We do not need to do anything since the ASCII pass already handled this.

1574

return false; // Bounds not checked.

1575

}

1576

bool checked = false;

1577

// We handle the length > 1 case in a later pass.

1578

if (length == 1) {

1579

if (ascii && c > String::kMaxAsciiCharCodeU) {

1580

// Can't match - see above.

1581

return false; // Bounds not checked.

1582

}

1583

if (!preloaded) {

1584

macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1585

checked = check;

1586

}

1587

macro_assembler->CheckNotCharacter(c, on_failure);

1588

}

1589

return checked;

1590

}

1591

1592

1593

static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

1594

bool ascii,

1595

uc16 c1,

1596

uc16 c2,

1597

Label* on_failure) {

1598

uc16 char_mask;

1599

if (ascii) {

1600

char_mask = String::kMaxAsciiCharCode;

1601

} else {

1602

char_mask = String::kMaxUtf16CodeUnit;

1603

}

1604

uc16 exor = c1 ^ c2;

1605

// Check whether exor has only one bit set.

1606

if (((exor - 1) & exor) == 0) {

1607

// If c1 and c2 differ only by one bit.

1608

// Ecma262UnCanonicalize always gives the highest number last.

1609

ASSERT(c2 > c1);

1610

uc16 mask = char_mask ^ exor;

1611

macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);

1612

return true;

1613

}

1614

ASSERT(c2 > c1);

1615

uc16 diff = c2 - c1;

1616

if (((diff - 1) & diff) == 0 && c1 >= diff) {

1617

// If the characters differ by 2^n but don't differ by one bit then

1618

// subtract the difference from the found character, then do the or

1619

// trick. We avoid the theoretical case where negative numbers are

1620

// involved in order to simplify code generation.

1621

uc16 mask = char_mask ^ diff;

1622

macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

1623

diff,

1624

mask,

1625

on_failure);

1626

return true;

1627

}

1628

return false;

1629

}

1630

1631

1632

typedef bool EmitCharacterFunction(Isolate* isolate,

1633

RegExpCompiler* compiler,

1634

uc16 c,

1635

Label* on_failure,

1636

int cp_offset,

1637

bool check,

1638

bool preloaded);

1639

1640

// Only emits letters (things that have case). Only used for case independent

1641

// matches.

1642

static inline bool EmitAtomLetter(Isolate* isolate,

1643

RegExpCompiler* compiler,

1644

uc16 c,

1645

Label* on_failure,

1646

int cp_offset,

1647

bool check,

1648

bool preloaded) {

1649

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1650

bool ascii = compiler->ascii();

1651

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1652

int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

1653

if (length <= 1) return false;

1654

// We may not need to check against the end of the input string

1655

// if this character lies before a character that matched.

1656

if (!preloaded) {

1657

macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1658

}

1659

Label ok;

1660

ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

1661

switch (length) {

1662

case 2: {

1663

if (ShortCutEmitCharacterPair(macro_assembler,

1664

ascii,

1665

chars[0],

1666

chars[1],

1667

on_failure)) {

1668

} else {

1669

macro_assembler->CheckCharacter(chars[0], &ok);

1670

macro_assembler->CheckNotCharacter(chars[1], on_failure);

1671

macro_assembler->Bind(&ok);

1672

}

1673

break;

1674

}

1675

case 4:

1676

macro_assembler->CheckCharacter(chars[3], &ok);

1677

// Fall through!

1678

case 3:

1679

macro_assembler->CheckCharacter(chars[0], &ok);

1680

macro_assembler->CheckCharacter(chars[1], &ok);

1681

macro_assembler->CheckNotCharacter(chars[2], on_failure);

1682

macro_assembler->Bind(&ok);

1683

break;

1684

default:

1685

UNREACHABLE();

1686

break;

1687

}

1688

return true;

1689

}

1690

1691

1692

static void EmitBoundaryTest(RegExpMacroAssembler* masm,

1693

int border,

1694

Label* fall_through,

1695

Label* above_or_equal,

1696

Label* below) {

1697

if (below != fall_through) {

1698

masm->CheckCharacterLT(border, below);

1699

if (above_or_equal != fall_through) masm->GoTo(above_or_equal);

1700

} else {

1701

masm->CheckCharacterGT(border - 1, above_or_equal);

1702

}

1703

}

1704

1705

1706

static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,

1707

int first,

1708

int last,

1709

Label* fall_through,

1710

Label* in_range,

1711

Label* out_of_range) {

1712

if (in_range == fall_through) {

1713

if (first == last) {

1714

masm->CheckNotCharacter(first, out_of_range);

1715

} else {

1716

masm->CheckCharacterNotInRange(first, last, out_of_range);

1717

}

1718

} else {

1719

if (first == last) {

1720

masm->CheckCharacter(first, in_range);

1721

} else {

1722

masm->CheckCharacterInRange(first, last, in_range);

1723

}

1724

if (out_of_range != fall_through) masm->GoTo(out_of_range);

1725

}

1726

}

1727

1728

1729

// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.

1730

// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.

1731

static void EmitUseLookupTable(

1732

RegExpMacroAssembler* masm,

1733

ZoneList<int>* ranges,

1734

int start_index,

1735

int end_index,

1736

int min_char,

1737

Label* fall_through,

1738

Label* even_label,

1739

Label* odd_label) {

1740

static const int kSize = RegExpMacroAssembler::kTableSize;

1741

static const int kMask = RegExpMacroAssembler::kTableMask;

1742

1743

int base = (min_char & ~kMask);

1744

USE(base);

1745

1746

// Assert that everything is on one kTableSize page.

1747

for (int i = start_index; i <= end_index; i++) {

1748

ASSERT_EQ(ranges->at(i) & ~kMask, base);

1749

}

1750

ASSERT(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);

1751

1752

char templ[kSize];

1753

Label* on_bit_set;

1754

Label* on_bit_clear;

1755

int bit;

1756

if (even_label == fall_through) {

1757

on_bit_set = odd_label;

1758

on_bit_clear = even_label;

1759

bit = 1;

1760

} else {

1761

on_bit_set = even_label;

1762

on_bit_clear = odd_label;

1763

bit = 0;

1764

}

1765

for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {

1766

templ[i] = bit;

1767

}

1768

int j = 0;

1769

bit ^= 1;

1770

for (int i = start_index; i < end_index; i++) {

1771

for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {

1772

templ[j] = bit;

1773

}

1774

bit ^= 1;

1775

}

1776

for (int i = j; i < kSize; i++) {

1777

templ[i] = bit;

1778

}

1779

// TODO(erikcorry): Cache these.

1780

Handle<ByteArray> ba = FACTORY->NewByteArray(kSize, TENURED);

1781

for (int i = 0; i < kSize; i++) {

1782

ba->set(i, templ[i]);

1783

}

1784

masm->CheckBitInTable(ba, on_bit_set);

1785

if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);

1786

}

1787

1788

1789

static void CutOutRange(RegExpMacroAssembler* masm,

1790

ZoneList<int>* ranges,

1791

int start_index,

1792

int end_index,

1793

int cut_index,

1794

Label* even_label,

1795

Label* odd_label) {

1796

bool odd = (((cut_index - start_index) & 1) == 1);

1797

Label* in_range_label = odd ? odd_label : even_label;

1798

Label dummy;

1799

EmitDoubleBoundaryTest(masm,

1800

ranges->at(cut_index),

1801

ranges->at(cut_index + 1) - 1,

1802

&dummy,

1803

in_range_label,

1804

&dummy);

1805

ASSERT(!dummy.is_linked());

1806

// Cut out the single range by rewriting the array. This creates a new

1807

// range that is a merger of the two ranges on either side of the one we

1808

// are cutting out. The oddity of the labels is preserved.

1809

for (int j = cut_index; j > start_index; j--) {

1810

ranges->at(j) = ranges->at(j - 1);

1811

}

1812

for (int j = cut_index + 1; j < end_index; j++) {

1813

ranges->at(j) = ranges->at(j + 1);

1814

}

1815

}

1816

1817

1818

// Unicode case. Split the search space into kSize spaces that are handled

1819

// with recursion.

1820

static void SplitSearchSpace(ZoneList<int>* ranges,

1821

int start_index,

1822

int end_index,

1823

int* new_start_index,

1824

int* new_end_index,

1825

int* border) {

1826

static const int kSize = RegExpMacroAssembler::kTableSize;

1827

static const int kMask = RegExpMacroAssembler::kTableMask;

1828

1829

int first = ranges->at(start_index);

1830

int last = ranges->at(end_index) - 1;

1831

1832

*new_start_index = start_index;

1833

*border = (ranges->at(start_index) & ~kMask) + kSize;

1834

while (*new_start_index < end_index) {

1835

if (ranges->at(*new_start_index) > *border) break;

1836

(*new_start_index)++;

1837

}

1838

// new_start_index is the index of the first edge that is beyond the

1839

// current kSize space.

1840

1841

// For very large search spaces we do a binary chop search of the non-ASCII

1842

// space instead of just going to the end of the current kSize space. The

1843

// heuristics are complicated a little by the fact that any 128-character

1844

// encoding space can be quickly tested with a table lookup, so we don't

1845

// wish to do binary chop search at a smaller granularity than that. A

1846

// 128-character space can take up a lot of space in the ranges array if,

1847

// for example, we only want to match every second character (eg. the lower

1848

// case characters on some Unicode pages).

1849

int binary_chop_index = (end_index + start_index) / 2;

1850

// The first test ensures that we get to the code that handles the ASCII

1851

// range with a single not-taken branch, speeding up this important

1852

// character range (even non-ASCII charset-based text has spaces and

1853

// punctuation).

1854

if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case.

1855

end_index - start_index > (*new_start_index - start_index) * 2 &&

1856

last - first > kSize * 2 &&

1857

binary_chop_index > *new_start_index &&

1858

ranges->at(binary_chop_index) >= first + 2 * kSize) {

1859

int scan_forward_for_section_border = binary_chop_index;;

1860

int new_border = (ranges->at(binary_chop_index) | kMask) + 1;

1861

1862

while (scan_forward_for_section_border < end_index) {

1863

if (ranges->at(scan_forward_for_section_border) > new_border) {

1864

*new_start_index = scan_forward_for_section_border;

1865

*border = new_border;

1866

break;

1867

}

1868

scan_forward_for_section_border++;

1869

}

1870

}

1871

1872

ASSERT(*new_start_index > start_index);

1873

*new_end_index = *new_start_index - 1;

1874

if (ranges->at(*new_end_index) == *border) {

1875

(*new_end_index)--;

1876

}

1877

if (*border >= ranges->at(end_index)) {

1878

*border = ranges->at(end_index);

1879

*new_start_index = end_index; // Won't be used.

1880

*new_end_index = end_index - 1;

1881

}

1882

}

1883

1884

1885

// Gets a series of segment boundaries representing a character class. If the

1886

// character is in the range between an even and an odd boundary (counting from

1887

// start_index) then go to even_label, otherwise go to odd_label. We already

1888

// know that the character is in the range of min_char to max_char inclusive.

1889

// Either label can be NULL indicating backtracking. Either label can also be

1890

// equal to the fall_through label.

1891

static void GenerateBranches(RegExpMacroAssembler* masm,

1892

ZoneList<int>* ranges,

1893

int start_index,

1894

int end_index,

1895

uc16 min_char,

1896

uc16 max_char,

1897

Label* fall_through,

1898

Label* even_label,

1899

Label* odd_label) {

1900

int first = ranges->at(start_index);

1901

int last = ranges->at(end_index) - 1;

1902

1903

ASSERT_LT(min_char, first);

1904

1905

// Just need to test if the character is before or on-or-after

1906

// a particular character.

1907

if (start_index == end_index) {

1908

EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);

1909

return;

1910

}

1911

1912

// Another almost trivial case: There is one interval in the middle that is

1913

// different from the end intervals.

1914

if (start_index + 1 == end_index) {

1915

EmitDoubleBoundaryTest(

1916

masm, first, last, fall_through, even_label, odd_label);

1917

return;

1918

}

1919

1920

// It's not worth using table lookup if there are very few intervals in the

1921

// character class.

1922

if (end_index - start_index <= 6) {

1923

// It is faster to test for individual characters, so we look for those

1924

// first, then try arbitrary ranges in the second round.

1925

static int kNoCutIndex = -1;

1926

int cut = kNoCutIndex;

1927

for (int i = start_index; i < end_index; i++) {

1928

if (ranges->at(i) == ranges->at(i + 1) - 1) {

1929

cut = i;

1930

break;

1931

}

1932

}

1933

if (cut == kNoCutIndex) cut = start_index;

1934

CutOutRange(

1935

masm, ranges, start_index, end_index, cut, even_label, odd_label);

1936

ASSERT_GE(end_index - start_index, 2);

1937

GenerateBranches(masm,

1938

ranges,

1939

start_index + 1,

1940

end_index - 1,

1941

min_char,

1942

max_char,

1943

fall_through,

1944

even_label,

1945

odd_label);

1946

return;

1947

}

1948

1949

// If there are a lot of intervals in the regexp, then we will use tables to

1950

// determine whether the character is inside or outside the character class.

1951

static const int kBits = RegExpMacroAssembler::kTableSizeBits;

1952

1953

if ((max_char >> kBits) == (min_char >> kBits)) {

1954

EmitUseLookupTable(masm,

1955

ranges,

1956

start_index,

1957

end_index,

1958

min_char,

1959

fall_through,

1960

even_label,

1961

odd_label);

1962

return;

1963

}

1964

1965

if ((min_char >> kBits) != (first >> kBits)) {

1966

masm->CheckCharacterLT(first, odd_label);

1967

GenerateBranches(masm,

1968

ranges,

1969

start_index + 1,

1970

end_index,

1971

first,

1972

max_char,

1973

fall_through,

1974

odd_label,

1975

even_label);

1976

return;

1977

}

1978

1979

int new_start_index = 0;

1980

int new_end_index = 0;

1981

int border = 0;

1982

1983

SplitSearchSpace(ranges,

1984

start_index,

1985

end_index,

1986

&new_start_index,

1987

&new_end_index,

1988

&border);

1989

1990

Label handle_rest;

1991

Label* above = &handle_rest;

1992

if (border == last + 1) {

1993

// We didn't find any section that started after the limit, so everything

1994

// above the border is one of the terminal labels.

1995

above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;

1996

ASSERT(new_end_index == end_index - 1);

1997

}

1998

1999

ASSERT_LE(start_index, new_end_index);

2000

ASSERT_LE(new_start_index, end_index);

2001

ASSERT_LT(start_index, new_start_index);

2002

ASSERT_LT(new_end_index, end_index);

2003

ASSERT(new_end_index + 1 == new_start_index ||

2004

(new_end_index + 2 == new_start_index &&

2005

border == ranges->at(new_end_index + 1)));

2006

ASSERT_LT(min_char, border - 1);

2007

ASSERT_LT(border, max_char);

2008

ASSERT_LT(ranges->at(new_end_index), border);

2009

ASSERT(border < ranges->at(new_start_index) ||

2010

(border == ranges->at(new_start_index) &&

2011

new_start_index == end_index &&

2012

new_end_index == end_index - 1 &&

2013

border == last + 1));

2014

ASSERT(new_start_index == 0 || border >= ranges->at(new_start_index - 1));

2015

2016

masm->CheckCharacterGT(border - 1, above);

2017

Label dummy;

2018

GenerateBranches(masm,

2019

ranges,

2020

start_index,

2021

new_end_index,

2022

min_char,

2023

border - 1,

2024

&dummy,

2025

even_label,

2026

odd_label);

2027

if (handle_rest.is_linked()) {

2028

masm->Bind(&handle_rest);

2029

bool flip = (new_start_index & 1) != (start_index & 1);

2030

GenerateBranches(masm,

2031

ranges,

2032

new_start_index,

2033

end_index,

2034

border,

2035

max_char,

2036

&dummy,

2037

flip ? odd_label : even_label,

2038

flip ? even_label : odd_label);

2039

}

2040

}

2041

2042

2043

static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

2044

RegExpCharacterClass* cc,

2045

bool ascii,

2046

Label* on_failure,

2047

int cp_offset,

2048

bool check_offset,

2049

bool preloaded,

2050

Zone* zone) {

2051

ZoneList<CharacterRange>* ranges = cc->ranges(zone);

2052

if (!CharacterRange::IsCanonical(ranges)) {

2053

CharacterRange::Canonicalize(ranges);

2054

}

2055

2056

int max_char;

2057

if (ascii) {

2058

max_char = String::kMaxAsciiCharCode;

2059

} else {

2060

max_char = String::kMaxUtf16CodeUnit;

2061

}

2062

2063

int range_count = ranges->length();

2064

2065

int last_valid_range = range_count - 1;

2066

while (last_valid_range >= 0) {

2067

CharacterRange& range = ranges->at(last_valid_range);

2068

if (range.from() <= max_char) {

2069

break;

2070

}

2071

last_valid_range--;

2072

}

2073

2074

if (last_valid_range < 0) {

2075

if (!cc->is_negated()) {

2076

macro_assembler->GoTo(on_failure);

2077

}

2078

if (check_offset) {

2079

macro_assembler->CheckPosition(cp_offset, on_failure);

2080

}

2081

return;

2082

}

2083

2084

if (last_valid_range == 0 &&

2085

ranges->at(0).IsEverything(max_char)) {

2086

if (cc->is_negated()) {

2087

macro_assembler->GoTo(on_failure);

2088

} else {

2089

// This is a common case hit by non-anchored expressions.

2090

if (check_offset) {

2091

macro_assembler->CheckPosition(cp_offset, on_failure);

2092

}

2093

}

2094

return;

2095

}

2096

if (last_valid_range == 0 &&

2097

!cc->is_negated() &&

2098

ranges->at(0).IsEverything(max_char)) {

2099

// This is a common case hit by non-anchored expressions.

2100

if (check_offset) {

2101

macro_assembler->CheckPosition(cp_offset, on_failure);

2102

}

2103

return;

2104

}

2105

2106

if (!preloaded) {

2107

macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);

2108

}

2109

2110

if (cc->is_standard(zone) &&

2111

macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

2112

on_failure)) {

2113

return;

2114

}

2115

2116

2117

// A new list with ascending entries. Each entry is a code unit

2118

// where there is a boundary between code units that are part of

2119

// the class and code units that are not. Normally we insert an

2120

// entry at zero which goes to the failure label, but if there

2121

// was already one there we fall through for success on that entry.

2122

// Subsequent entries have alternating meaning (success/failure).

2123

ZoneList<int>* range_boundaries =

2124

new(zone) ZoneList<int>(last_valid_range, zone);

2125

2126

bool zeroth_entry_is_failure = !cc->is_negated();

2127

2128

for (int i = 0; i <= last_valid_range; i++) {

2129

CharacterRange& range = ranges->at(i);

2130

if (range.from() == 0) {

2131

ASSERT_EQ(i, 0);

2132

zeroth_entry_is_failure = !zeroth_entry_is_failure;

2133

} else {

2134

range_boundaries->Add(range.from(), zone);

2135

}

2136

range_boundaries->Add(range.to() + 1, zone);

2137

}

2138

int end_index = range_boundaries->length() - 1;

2139

if (range_boundaries->at(end_index) > max_char) {

2140

end_index--;

2141

}

2142

2143

Label fall_through;

2144

GenerateBranches(macro_assembler,

2145

range_boundaries,

2146

0, // start_index.

2147

end_index,

2148

0, // min_char.

2149

max_char,

2150

&fall_through,

2151

zeroth_entry_is_failure ? &fall_through : on_failure,

2152

zeroth_entry_is_failure ? on_failure : &fall_through);

2153

macro_assembler->Bind(&fall_through);

2154

}

2155

2156

2157

RegExpNode::~RegExpNode() {

2158

}

2159

2160

2161

RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,

2162

Trace* trace) {

2163

// If we are generating a greedy loop then don't stop and don't reuse code.

2164

if (trace->stop_node() != NULL) {

2165

return CONTINUE;

2166

}

2167

2168

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

2169

if (trace->is_trivial()) {

2170

if (label_.is_bound()) {

2171

// We are being asked to generate a generic version, but that's already

2172

// been done so just go to it.

2173

macro_assembler->GoTo(&label_);

2174

return DONE;

2175

}

2176

if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {

2177

// To avoid too deep recursion we push the node to the work queue and just

2178

// generate a goto here.

2179

compiler->AddWork(this);

2180

macro_assembler->GoTo(&label_);

2181

return DONE;

2182

}

2183

// Generate generic version of the node and bind the label for later use.

2184

macro_assembler->Bind(&label_);

2185

return CONTINUE;

2186

}

2187

2188

// We are being asked to make a non-generic version. Keep track of how many

2189

// non-generic versions we generate so as not to overdo it.

2190

trace_count_++;

2191

if (FLAG_regexp_optimization &&

2192

trace_count_ < kMaxCopiesCodeGenerated &&

2193

compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {

2194

return CONTINUE;

2195

}

2196

2197

// If we get here code has been generated for this node too many times or

2198

// recursion is too deep. Time to switch to a generic version. The code for

2199

// generic versions above can handle deep recursion properly.

2200

trace->Flush(compiler, this);

2201

return DONE;

2202

}

2203

2204

2205

int ActionNode::EatsAtLeast(int still_to_find,

2206

int recursion_depth,

2207

bool not_at_start) {

2208

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

2209

if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!

2210

return on_success()->EatsAtLeast(still_to_find,

2211

recursion_depth + 1,

2212

not_at_start);

2213

}

2214

2215

2216

void ActionNode::FillInBMInfo(int offset,

2217

int recursion_depth,

2218

int budget,

2219

BoyerMooreLookahead* bm,

2220

bool not_at_start) {

2221

if (type_ == BEGIN_SUBMATCH) {

2222

bm->SetRest(offset);

2223

} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {

2224

on_success()->FillInBMInfo(

2225

offset, recursion_depth + 1, budget - 1, bm, not_at_start);

2226

}

2227

SaveBMInfo(bm, not_at_start, offset);

2228

}

2229

2230

2231

int AssertionNode::EatsAtLeast(int still_to_find,

2232

int recursion_depth,

2233

bool not_at_start) {

2234

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

2235

// If we know we are not at the start and we are asked "how many characters

2236

// will you match if you succeed?" then we can answer anything since false

2237

// implies false. So lets just return the max answer (still_to_find) since

2238

// that won't prevent us from preloading a lot of characters for the other

2239

// branches in the node graph.

2240

if (type() == AT_START && not_at_start) return still_to_find;

2241

return on_success()->EatsAtLeast(still_to_find,

2242

recursion_depth + 1,

2243

not_at_start);

2244

}

2245

2246

2247

void AssertionNode::FillInBMInfo(int offset,

2248

int recursion_depth,

2249

int budget,

2250

BoyerMooreLookahead* bm,

2251

bool not_at_start) {

2252

// Match the behaviour of EatsAtLeast on this node.

2253

if (type() == AT_START && not_at_start) return;

2254

on_success()->FillInBMInfo(

2255

offset, recursion_depth + 1, budget - 1, bm, not_at_start);

2256

SaveBMInfo(bm, not_at_start, offset);

2257

}

2258

2259

2260

int BackReferenceNode::EatsAtLeast(int still_to_find,

2261

int recursion_depth,

2262

bool not_at_start) {

2263

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

2264

return on_success()->EatsAtLeast(still_to_find,

2265

recursion_depth + 1,

2266

not_at_start);

2267

}

2268

2269

2270

int TextNode::EatsAtLeast(int still_to_find,

2271

int recursion_depth,

2272

bool not_at_start) {

2273

int answer = Length();

2274

if (answer >= still_to_find) return answer;

2275

if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;

2276

// We are not at start after this node so we set the last argument to 'true'.

2277

return answer + on_success()->EatsAtLeast(still_to_find - answer,

2278

recursion_depth + 1,

2279

true);

2280

}

2281

2282

2283

int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,

2284

int recursion_depth,

2285

bool not_at_start) {

2286

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

2287

// Alternative 0 is the negative lookahead, alternative 1 is what comes

2288

// afterwards.

2289

RegExpNode* node = alternatives_->at(1).node();

2290

return node->EatsAtLeast(still_to_find, recursion_depth + 1, not_at_start);

2291

}

2292

2293

2294

void NegativeLookaheadChoiceNode::GetQuickCheckDetails(

2295

QuickCheckDetails* details,

2296

RegExpCompiler* compiler,

2297

int filled_in,

2298

bool not_at_start) {

2299

// Alternative 0 is the negative lookahead, alternative 1 is what comes

2300

// afterwards.

2301

RegExpNode* node = alternatives_->at(1).node();

2302

return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);

2303

}

2304

2305

2306

int ChoiceNode::EatsAtLeastHelper(int still_to_find,

2307

int recursion_depth,

2308

RegExpNode* ignore_this_node,

2309

bool not_at_start) {

2310

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

2311

int min = 100;

2312

int choice_count = alternatives_->length();

2313

for (int i = 0; i < choice_count; i++) {

2314

RegExpNode* node = alternatives_->at(i).node();

2315

if (node == ignore_this_node) continue;

2316

int node_eats_at_least = node->EatsAtLeast(still_to_find,

2317

recursion_depth + 1,

2318

not_at_start);

2319

if (node_eats_at_least < min) min = node_eats_at_least;

2320

}

2321

return min;

2322

}

2323

2324

2325

int LoopChoiceNode::EatsAtLeast(int still_to_find,

2326

int recursion_depth,

2327

bool not_at_start) {

2328

return EatsAtLeastHelper(still_to_find,

2329

recursion_depth,

2330

loop_node_,

2331

not_at_start);

2332

}

2333

2334

2335

int ChoiceNode::EatsAtLeast(int still_to_find,

2336

int recursion_depth,

2337

bool not_at_start) {

2338

return EatsAtLeastHelper(still_to_find,

2339

recursion_depth,

2340

NULL,

2341

not_at_start);

2342

}

2343

2344

2345

// Takes the left-most 1-bit and smears it out, setting all bits to its right.

2346

static inline uint32_t SmearBitsRight(uint32_t v) {

2347

v |= v >> 1;

2348

v |= v >> 2;

2349

v |= v >> 4;

2350

v |= v >> 8;

2351

v |= v >> 16;

2352

return v;

2353

}

2354

2355

2356

bool QuickCheckDetails::Rationalize(bool asc) {

2357

bool found_useful_op = false;

2358

uint32_t char_mask;

2359

if (asc) {

2360

char_mask = String::kMaxAsciiCharCode;

2361

} else {

2362

char_mask = String::kMaxUtf16CodeUnit;

2363

}

2364

mask_ = 0;

2365

value_ = 0;

2366

int char_shift = 0;

2367

for (int i = 0; i < characters_; i++) {

2368

Position* pos = &positions_[i];

2369

if ((pos->mask & String::kMaxAsciiCharCode) != 0) {

2370

found_useful_op = true;

2371

}

2372

mask_ |= (pos->mask & char_mask) << char_shift;

2373

value_ |= (pos->value & char_mask) << char_shift;

2374

char_shift += asc ? 8 : 16;

2375

}

2376

return found_useful_op;

2377

}

2378

2379

2380

bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

2381

Trace* trace,

2382

bool preload_has_checked_bounds,

2383

Label* on_possible_success,

2384

QuickCheckDetails* details,

2385

bool fall_through_on_failure) {

2386

if (details->characters() == 0) return false;

2387

GetQuickCheckDetails(details, compiler, 0, trace->at_start() == Trace::FALSE);

2388

if (details->cannot_match()) return false;

2389

if (!details->Rationalize(compiler->ascii())) return false;

2390

ASSERT(details->characters() == 1 ||

2391

compiler->macro_assembler()->CanReadUnaligned());

2392

uint32_t mask = details->mask();

2393

uint32_t value = details->value();

2394

2395

RegExpMacroAssembler* assembler = compiler->macro_assembler();

2396

2397

if (trace->characters_preloaded() != details->characters()) {

2398

assembler->LoadCurrentCharacter(trace->cp_offset(),

2399

trace->backtrack(),

2400

!preload_has_checked_bounds,

2401

details->characters());

2402

}

2403

2404

2405

bool need_mask = true;

2406

2407

if (details->characters() == 1) {

2408

// If number of characters preloaded is 1 then we used a byte or 16 bit

2409

// load so the value is already masked down.

2410

uint32_t char_mask;

2411

if (compiler->ascii()) {

2412

char_mask = String::kMaxAsciiCharCode;

2413

} else {

2414

char_mask = String::kMaxUtf16CodeUnit;

2415

}

2416

if ((mask & char_mask) == char_mask) need_mask = false;

2417

mask &= char_mask;

2418

} else {

2419

// For 2-character preloads in ASCII mode or 1-character preloads in

2420

// TWO_BYTE mode we also use a 16 bit load with zero extend.

2421

if (details->characters() == 2 && compiler->ascii()) {

2422

if ((mask & 0x7f7f) == 0x7f7f) need_mask = false;

2423

} else if (details->characters() == 1 && !compiler->ascii()) {

2424

if ((mask & 0xffff) == 0xffff) need_mask = false;

2425

} else {

2426

if (mask == 0xffffffff) need_mask = false;

2427

}

2428

}

2429

2430

if (fall_through_on_failure) {

2431

if (need_mask) {

2432

assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

2433

} else {

2434

assembler->CheckCharacter(value, on_possible_success);

2435

}

2436

} else {

2437

if (need_mask) {

2438

assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());

2439

} else {

2440

assembler->CheckNotCharacter(value, trace->backtrack());

2441

}

2442

}

2443

return true;

2444

}

2445

2446

2447

// Here is the meat of GetQuickCheckDetails (see also the comment on the

2448

// super-class in the .h file).

2449

2450

// We iterate along the text object, building up for each character a

2451

// mask and value that can be used to test for a quick failure to match.

2452

// The masks and values for the positions will be combined into a single

2453

// machine word for the current character width in order to be used in

2454

// generating a quick check.

2455

void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

2456

RegExpCompiler* compiler,

2457

int characters_filled_in,

2458

bool not_at_start) {

2459

Isolate* isolate = Isolate::Current();

2460

ASSERT(characters_filled_in < details->characters());

2461

int characters = details->characters();

2462

int char_mask;

2463

if (compiler->ascii()) {

2464

char_mask = String::kMaxAsciiCharCode;

2465

} else {

2466

char_mask = String::kMaxUtf16CodeUnit;

2467

}

2468

for (int k = 0; k < elms_->length(); k++) {

2469

TextElement elm = elms_->at(k);

2470

if (elm.type == TextElement::ATOM) {

2471

Vector<const uc16> quarks = elm.data.u_atom->data();

2472

for (int i = 0; i < characters && i < quarks.length(); i++) {

2473

QuickCheckDetails::Position* pos =

2474

details->positions(characters_filled_in);

2475

uc16 c = quarks[i];

2476

if (c > char_mask) {

2477

// If we expect a non-ASCII character from an ASCII string,

2478

// there is no way we can match. Not even case independent

2479

// matching can turn an ASCII character into non-ASCII or

2480

// vice versa.

2481

details->set_cannot_match();

2482

pos->determines_perfectly = false;

2483

return;

2484

}

2485

if (compiler->ignore_case()) {

2486

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

2487

int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),

2488

chars);

2489

ASSERT(length != 0); // Can only happen if c > char_mask (see above).

2490

if (length == 1) {

2491

// This letter has no case equivalents, so it's nice and simple

2492

// and the mask-compare will determine definitely whether we have

2493

// a match at this character position.

2494

pos->mask = char_mask;

2495

pos->value = c;

2496

pos->determines_perfectly = true;

2497

} else {

2498

uint32_t common_bits = char_mask;

2499

uint32_t bits = chars[0];

2500

for (int j = 1; j < length; j++) {

2501

uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);

2502

common_bits ^= differing_bits;

2503

bits &= common_bits;

2504

}

2505

// If length is 2 and common bits has only one zero in it then

2506

// our mask and compare instruction will determine definitely

2507

// whether we have a match at this character position. Otherwise

2508

// it can only be an approximate check.

2509

uint32_t one_zero = (common_bits | ~char_mask);

2510

if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {

2511

pos->determines_perfectly = true;

2512

}

2513

pos->mask = common_bits;

2514

pos->value = bits;

2515

}

2516

} else {

2517

// Don't ignore case. Nice simple case where the mask-compare will

2518

// determine definitely whether we have a match at this character

2519

// position.

2520

pos->mask = char_mask;

2521

pos->value = c;

2522

pos->determines_perfectly = true;

2523

}

2524

characters_filled_in++;

2525

ASSERT(characters_filled_in <= details->characters());

2526

if (characters_filled_in == details->characters()) {

2527

return;

2528

}

2529

}

2530

} else {

2531

QuickCheckDetails::Position* pos =

2532

details->positions(characters_filled_in);

2533

RegExpCharacterClass* tree = elm.data.u_char_class;

2534

ZoneList<CharacterRange>* ranges = tree->ranges(zone());

2535

if (tree->is_negated()) {

2536

// A quick check uses multi-character mask and compare. There is no

2537

// useful way to incorporate a negative char class into this scheme

2538

// so we just conservatively create a mask and value that will always

2539

// succeed.

2540

pos->mask = 0;

2541

pos->value = 0;

2542

} else {

2543

int first_range = 0;

2544

while (ranges->at(first_range).from() > char_mask) {

2545

first_range++;

2546

if (first_range == ranges->length()) {

2547

details->set_cannot_match();

2548

pos->determines_perfectly = false;

2549

return;

2550

}

2551

}

2552

CharacterRange range = ranges->at(first_range);

2553

uc16 from = range.from();

2554

uc16 to = range.to();

2555

if (to > char_mask) {

2556

to = char_mask;

2557

}

2558

uint32_t differing_bits = (from ^ to);

2559

// A mask and compare is only perfect if the differing bits form a

2560

// number like 00011111 with one single block of trailing 1s.

2561

if ((differing_bits & (differing_bits + 1)) == 0 &&

2562

from + differing_bits == to) {

2563

pos->determines_perfectly = true;

2564

}

2565

uint32_t common_bits = ~SmearBitsRight(differing_bits);

2566

uint32_t bits = (from & common_bits);

2567

for (int i = first_range + 1; i < ranges->length(); i++) {

2568

CharacterRange range = ranges->at(i);

2569

uc16 from = range.from();

2570

uc16 to = range.to();

2571

if (from > char_mask) continue;

2572

if (to > char_mask) to = char_mask;

2573

// Here we are combining more ranges into the mask and compare

2574

// value. With each new range the mask becomes more sparse and

2575

// so the chances of a false positive rise. A character class

2576

// with multiple ranges is assumed never to be equivalent to a

2577

// mask and compare operation.

2578

pos->determines_perfectly = false;

2579

uint32_t new_common_bits = (from ^ to);

2580

new_common_bits = ~SmearBitsRight(new_common_bits);

2581

common_bits &= new_common_bits;

2582

bits &= new_common_bits;

2583

uint32_t differing_bits = (from & common_bits) ^ bits;

2584

common_bits ^= differing_bits;

2585

bits &= common_bits;

2586

}

2587

pos->mask = common_bits;

2588

pos->value = bits;

2589

}

2590

characters_filled_in++;

2591

ASSERT(characters_filled_in <= details->characters());

2592

if (characters_filled_in == details->characters()) {

2593

return;

2594

}

2595

}

2596

}

2597

ASSERT(characters_filled_in != details->characters());

2598

if (!details->cannot_match()) {

2599

on_success()-> GetQuickCheckDetails(details,

2600

compiler,

2601

characters_filled_in,

2602

true);

2603

}

2604

}

2605

2606

2607

void QuickCheckDetails::Clear() {

2608

for (int i = 0; i < characters_; i++) {

2609

positions_[i].mask = 0;

2610

positions_[i].value = 0;

2611

positions_[i].determines_perfectly = false;

2612

}

2613

characters_ = 0;

2614

}

2615

2616

2617

void QuickCheckDetails::Advance(int by, bool ascii) {

2618

ASSERT(by >= 0);

2619

if (by >= characters_) {

2620

Clear();

2621

return;

2622

}

2623

for (int i = 0; i < characters_ - by; i++) {

2624

positions_[i] = positions_[by + i];

2625

}

2626

for (int i = characters_ - by; i < characters_; i++) {

2627

positions_[i].mask = 0;

2628

positions_[i].value = 0;

2629

positions_[i].determines_perfectly = false;

2630

}

2631

characters_ -= by;

2632

// We could change mask_ and value_ here but we would never advance unless

2633

// they had already been used in a check and they won't be used again because

2634

// it would gain us nothing. So there's no point.

2635

}

2636

2637

2638

void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {

2639

ASSERT(characters_ == other->characters_);

2640

if (other->cannot_match_) {

2641

return;

2642

}

2643

if (cannot_match_) {

2644

*this = *other;

2645

return;

2646

}

2647

for (int i = from_index; i < characters_; i++) {

2648

QuickCheckDetails::Position* pos = positions(i);

2649

QuickCheckDetails::Position* other_pos = other->positions(i);

2650

if (pos->mask != other_pos->mask ||

2651

pos->value != other_pos->value ||

2652

!other_pos->determines_perfectly) {

2653

// Our mask-compare operation will be approximate unless we have the

2654

// exact same operation on both sides of the alternation.

2655

pos->determines_perfectly = false;

2656

}

2657

pos->mask &= other_pos->mask;

2658

pos->value &= pos->mask;

2659

other_pos->value &= pos->mask;

2660

uc16 differing_bits = (pos->value ^ other_pos->value);

2661

pos->mask &= ~differing_bits;

2662

pos->value &= pos->mask;

2663

}

2664

}

2665

2666

2667

class VisitMarker {

2668

public:

2669

explicit VisitMarker(NodeInfo* info) : info_(info) {

2670

ASSERT(!info->visited);

2671

info->visited = true;

2672

}

2673

~VisitMarker() {

2674

info_->visited = false;

2675

}

2676

private:

2677

NodeInfo* info_;

2678

};

2679

2680

2681

RegExpNode* SeqRegExpNode::FilterASCII(int depth) {

2682

if (info()->replacement_calculated) return replacement();

2683

if (depth < 0) return this;

2684

ASSERT(!info()->visited);

2685

VisitMarker marker(info());

2686

return FilterSuccessor(depth - 1);

2687

}

2688

2689

2690

RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {

2691

RegExpNode* next = on_success_->FilterASCII(depth - 1);

2692

if (next == NULL) return set_replacement(NULL);

2693

on_success_ = next;

2694

return set_replacement(this);

2695

}

2696

2697

2698

RegExpNode* TextNode::FilterASCII(int depth) {

2699

if (info()->replacement_calculated) return replacement();

2700

if (depth < 0) return this;

2701

ASSERT(!info()->visited);

2702

VisitMarker marker(info());

2703

int element_count = elms_->length();

2704

for (int i = 0; i < element_count; i++) {

2705

TextElement elm = elms_->at(i);

2706

if (elm.type == TextElement::ATOM) {

2707

Vector<const uc16> quarks = elm.data.u_atom->data();

2708

for (int j = 0; j < quarks.length(); j++) {

2709

// We don't need special handling for case independence

2710

// because of the rule that case independence cannot make

2711

// a non-ASCII character match an ASCII character.

2712

if (quarks[j] > String::kMaxAsciiCharCode) {

2713

return set_replacement(NULL);

2714

}

2715

}

2716

} else {

2717

ASSERT(elm.type == TextElement::CHAR_CLASS);

2718

RegExpCharacterClass* cc = elm.data.u_char_class;

2719

ZoneList<CharacterRange>* ranges = cc->ranges(zone());

2720

if (!CharacterRange::IsCanonical(ranges)) {

2721

CharacterRange::Canonicalize(ranges);

2722

}

2723

// Now they are in order so we only need to look at the first.

2724

int range_count = ranges->length();

2725

if (cc->is_negated()) {

2726

if (range_count != 0 &&

2727

ranges->at(0).from() == 0 &&

2728

ranges->at(0).to() >= String::kMaxAsciiCharCode) {

2729

return set_replacement(NULL);

2730

}

2731

} else {

2732

if (range_count == 0 ||

2733

ranges->at(0).from() > String::kMaxAsciiCharCode) {

2734

return set_replacement(NULL);

2735

}

2736

}

2737

}

2738

}

2739

return FilterSuccessor(depth - 1);

2740

}

2741

2742

2743

RegExpNode* LoopChoiceNode::FilterASCII(int depth) {

2744

if (info()->replacement_calculated) return replacement();

2745

if (depth < 0) return this;

2746

if (info()->visited) return this;

2747

{

2748

VisitMarker marker(info());

2749

2750

RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1);

2751

// If we can't continue after the loop then there is no sense in doing the

2752

// loop.

2753

if (continue_replacement == NULL) return set_replacement(NULL);

2754

}

2755

2756

return ChoiceNode::FilterASCII(depth - 1);

2757

}

2758

2759

2760

RegExpNode* ChoiceNode::FilterASCII(int depth) {

2761

if (info()->replacement_calculated) return replacement();

2762

if (depth < 0) return this;

2763

if (info()->visited) return this;

2764

VisitMarker marker(info());

2765

int choice_count = alternatives_->length();

2766

2767

for (int i = 0; i < choice_count; i++) {

2768

GuardedAlternative alternative = alternatives_->at(i);

2769

if (alternative.guards() != NULL && alternative.guards()->length() != 0) {

2770

set_replacement(this);

2771

return this;

2772

}

2773

}

2774

2775

int surviving = 0;

2776

RegExpNode* survivor = NULL;

2777

for (int i = 0; i < choice_count; i++) {

2778

GuardedAlternative alternative = alternatives_->at(i);

2779

RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1);

2780

ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.

2781

if (replacement != NULL) {

2782

alternatives_->at(i).set_node(replacement);

2783

surviving++;

2784

survivor = replacement;

2785

}

2786

}

2787

if (surviving < 2) return set_replacement(survivor);

2788

2789

set_replacement(this);

2790

if (surviving == choice_count) {

2791

return this;

2792

}

2793

// Only some of the nodes survived the filtering. We need to rebuild the

2794

// alternatives list.

2795

ZoneList<GuardedAlternative>* new_alternatives =

2796

new(zone()) ZoneList<GuardedAlternative>(surviving, zone());

2797

for (int i = 0; i < choice_count; i++) {

2798

RegExpNode* replacement =

2799

alternatives_->at(i).node()->FilterASCII(depth - 1);

2800

if (replacement != NULL) {

2801

alternatives_->at(i).set_node(replacement);

2802

new_alternatives->Add(alternatives_->at(i), zone());

2803

}

2804

}

2805

alternatives_ = new_alternatives;

2806

return this;

2807

}

2808

2809

2810

RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth) {

2811

if (info()->replacement_calculated) return replacement();

2812

if (depth < 0) return this;

2813

if (info()->visited) return this;

2814

VisitMarker marker(info());

2815

// Alternative 0 is the negative lookahead, alternative 1 is what comes

2816

// afterwards.

2817

RegExpNode* node = alternatives_->at(1).node();

2818

RegExpNode* replacement = node->FilterASCII(depth - 1);

2819

if (replacement == NULL) return set_replacement(NULL);

2820

alternatives_->at(1).set_node(replacement);

2821

2822

RegExpNode* neg_node = alternatives_->at(0).node();

2823

RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1);

2824

// If the negative lookahead is always going to fail then

2825

// we don't need to check it.

2826

if (neg_replacement == NULL) return set_replacement(replacement);

2827

alternatives_->at(0).set_node(neg_replacement);

2828

return set_replacement(this);

2829

}

2830

2831

2832

void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

2833

RegExpCompiler* compiler,

2834

int characters_filled_in,

2835

bool not_at_start) {

2836

if (body_can_be_zero_length_ || info()->visited) return;

2837

VisitMarker marker(info());

2838

return ChoiceNode::GetQuickCheckDetails(details,

2839

compiler,

2840

characters_filled_in,

2841

not_at_start);

2842

}

2843

2844

2845

void LoopChoiceNode::FillInBMInfo(int offset,

2846

int recursion_depth,

2847

int budget,

2848

BoyerMooreLookahead* bm,

2849

bool not_at_start) {

2850

if (body_can_be_zero_length_ ||

2851

recursion_depth > RegExpCompiler::kMaxRecursion ||

2852

budget <= 0) {

2853

bm->SetRest(offset);

2854

SaveBMInfo(bm, not_at_start, offset);

2855

return;

2856

}

2857

ChoiceNode::FillInBMInfo(

2858

offset, recursion_depth + 1, budget - 1, bm, not_at_start);

2859

SaveBMInfo(bm, not_at_start, offset);

2860

}

2861

2862

2863

void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

2864

RegExpCompiler* compiler,

2865

int characters_filled_in,

2866

bool not_at_start) {

2867

not_at_start = (not_at_start || not_at_start_);

2868

int choice_count = alternatives_->length();

2869

ASSERT(choice_count > 0);

2870

alternatives_->at(0).node()->GetQuickCheckDetails(details,

2871

compiler,

2872

characters_filled_in,

2873

not_at_start);

2874

for (int i = 1; i < choice_count; i++) {

2875

QuickCheckDetails new_details(details->characters());

2876

RegExpNode* node = alternatives_->at(i).node();

2877

node->GetQuickCheckDetails(&new_details, compiler,

2878

characters_filled_in,

2879

not_at_start);

2880

// Here we merge the quick match details of the two branches.

2881

details->Merge(&new_details, characters_filled_in);

2882

}

2883

}

2884

2885

2886

// Check for [0-9A-Z_a-z].

2887

static void EmitWordCheck(RegExpMacroAssembler* assembler,

2888

Label* word,

2889

Label* non_word,

2890

bool fall_through_on_word) {

2891

if (assembler->CheckSpecialCharacterClass(

2892

fall_through_on_word ? 'w' : 'W',

2893

fall_through_on_word ? non_word : word)) {

2894

// Optimized implementation available.

2895

return;

2896

}

2897

assembler->CheckCharacterGT('z', non_word);

2898

assembler->CheckCharacterLT('0', non_word);

2899

assembler->CheckCharacterGT('a' - 1, word);

2900

assembler->CheckCharacterLT('9' + 1, word);

2901

assembler->CheckCharacterLT('A', non_word);

2902

assembler->CheckCharacterLT('Z' + 1, word);

2903

if (fall_through_on_word) {

2904

assembler->CheckNotCharacter('_', non_word);

2905

} else {

2906

assembler->CheckCharacter('_', word);

2907

}

2908

}

2909

2910

2911

// Emit the code to check for a ^ in multiline mode (1-character lookbehind

2912

// that matches newline or the start of input).

2913

static void EmitHat(RegExpCompiler* compiler,

2914

RegExpNode* on_success,

2915

Trace* trace) {

2916

RegExpMacroAssembler* assembler = compiler->macro_assembler();

2917

// We will be loading the previous character into the current character

2918

// register.

2919

Trace new_trace(*trace);

2920

new_trace.InvalidateCurrentCharacter();

2921

2922

Label ok;

2923

if (new_trace.cp_offset() == 0) {

2924

// The start of input counts as a newline in this context, so skip to

2925

// ok if we are at the start.

2926

assembler->CheckAtStart(&ok);

2927

}

2928

// We already checked that we are not at the start of input so it must be

2929

// OK to load the previous character.

2930

assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,

2931

new_trace.backtrack(),

2932

false);

2933

if (!assembler->CheckSpecialCharacterClass('n',

2934

new_trace.backtrack())) {

2935

// Newline means \n, \r, 0x2028 or 0x2029.

2936

if (!compiler->ascii()) {

2937

assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

2938

}

2939

assembler->CheckCharacter('\n', &ok);

2940

assembler->CheckNotCharacter('\r', new_trace.backtrack());

2941

}

2942

assembler->Bind(&ok);

2943

on_success->Emit(compiler, &new_trace);

2944

}

2945

2946

2947

// Emit the code to handle \b and \B (word-boundary or non-word-boundary).

2948

void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {

2949

RegExpMacroAssembler* assembler = compiler->macro_assembler();

2950

Trace::TriBool next_is_word_character = Trace::UNKNOWN;

2951

bool not_at_start = (trace->at_start() == Trace::FALSE);

2952

BoyerMooreLookahead* lookahead = bm_info(not_at_start);

2953

if (lookahead == NULL) {

2954

int eats_at_least =

2955

Min(kMaxLookaheadForBoyerMoore,

2956

EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));

2957

if (eats_at_least >= 1) {

2958

BoyerMooreLookahead* bm =

2959

new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());

2960

FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);

2961

if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;

2962

if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;

2963

}

2964

} else {

2965

if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;

2966

if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE;

2967

}

2968

bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY);

2969

if (next_is_word_character == Trace::UNKNOWN) {

2970

Label before_non_word;

2971

Label before_word;

2972

if (trace->characters_preloaded() != 1) {

2973

assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);

2974

}

2975

// Fall through on non-word.

2976

EmitWordCheck(assembler, &before_word, &before_non_word, false);

2977

// Next character is not a word character.

2978

assembler->Bind(&before_non_word);

2979

Label ok;

2980

BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

2981

assembler->GoTo(&ok);

2982

2983

assembler->Bind(&before_word);

2984

BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

2985

assembler->Bind(&ok);

2986

} else if (next_is_word_character == Trace::TRUE) {

2987

BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

2988

} else {

2989

ASSERT(next_is_word_character == Trace::FALSE);

2990

BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

2991

}

2992

}

2993

2994

2995

void AssertionNode::BacktrackIfPrevious(

2996

RegExpCompiler* compiler,

2997

Trace* trace,

2998

AssertionNode::IfPrevious backtrack_if_previous) {

2999

RegExpMacroAssembler* assembler = compiler->macro_assembler();

3000

Trace new_trace(*trace);

3001

new_trace.InvalidateCurrentCharacter();

3002

3003

Label fall_through, dummy;

3004

3005

Label* non_word = backtrack_if_previous == kIsNonWord ?

3006

new_trace.backtrack() :

3007

&fall_through;

3008

Label* word = backtrack_if_previous == kIsNonWord ?

3009

&fall_through :

3010

new_trace.backtrack();

3011

3012

if (new_trace.cp_offset() == 0) {

3013

// The start of input counts as a non-word character, so the question is

3014

// decided if we are at the start.

3015

assembler->CheckAtStart(non_word);

3016

}

3017

// We already checked that we are not at the start of input so it must be

3018

// OK to load the previous character.

3019

assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);

3020

EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);

3021

3022

assembler->Bind(&fall_through);

3023

on_success()->Emit(compiler, &new_trace);

3024

}

3025

3026

3027

void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,

3028

RegExpCompiler* compiler,

3029

int filled_in,

3030

bool not_at_start) {

3031

if (type_ == AT_START && not_at_start) {

3032

details->set_cannot_match();

3033

return;

3034

}

3035

return on_success()->GetQuickCheckDetails(details,

3036

compiler,

3037

filled_in,

3038

not_at_start);

3039

}

3040

3041

3042

void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

3043

RegExpMacroAssembler* assembler = compiler->macro_assembler();

3044

switch (type_) {

3045

case AT_END: {

3046

Label ok;

3047

assembler->CheckPosition(trace->cp_offset(), &ok);

3048

assembler->GoTo(trace->backtrack());

3049

assembler->Bind(&ok);

3050

break;

3051

}

3052

case AT_START: {

3053

if (trace->at_start() == Trace::FALSE) {

3054

assembler->GoTo(trace->backtrack());

3055

return;

3056

}

3057

if (trace->at_start() == Trace::UNKNOWN) {

3058

assembler->CheckNotAtStart(trace->backtrack());

3059

Trace at_start_trace = *trace;

3060

at_start_trace.set_at_start(true);

3061

on_success()->Emit(compiler, &at_start_trace);

3062

return;

3063

}

3064

}

3065

break;

3066

case AFTER_NEWLINE:

3067

EmitHat(compiler, on_success(), trace);

3068

return;

3069

case AT_BOUNDARY:

3070

case AT_NON_BOUNDARY: {

3071

EmitBoundaryCheck(compiler, trace);

3072

return;

3073

}

3074

}

3075

on_success()->Emit(compiler, trace);

3076

}

3077

3078

3079

static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {

3080

if (quick_check == NULL) return false;

3081

if (offset >= quick_check->characters()) return false;

3082

return quick_check->positions(offset)->determines_perfectly;

3083

}

3084

3085

3086

static void UpdateBoundsCheck(int index, int* checked_up_to) {

3087

if (index > *checked_up_to) {

3088

*checked_up_to = index;

3089

}

3090

}

3091

3092

3093

// We call this repeatedly to generate code for each pass over the text node.

3094

// The passes are in increasing order of difficulty because we hope one

3095

// of the first passes will fail in which case we are saved the work of the

3096

// later passes. for example for the case independent regexp /%[asdfghjkl]a/

3097

// we will check the '%' in the first pass, the case independent 'a' in the

3098

// second pass and the character class in the last pass.

3099

3100

// The passes are done from right to left, so for example to test for /bar/

3101

// we will first test for an 'r' with offset 2, then an 'a' with offset 1

3102

// and then a 'b' with offset 0. This means we can avoid the end-of-input

3103

// bounds check most of the time. In the example we only need to check for

3104

// end-of-input when loading the putative 'r'.

3105

3106

// A slight complication involves the fact that the first character may already

3107

// be fetched into a register by the previous node. In this case we want to

3108

// do the test for that character first. We do this in separate passes. The

3109

// 'preloaded' argument indicates that we are doing such a 'pass'. If such a

3110

// pass has been performed then subsequent passes will have true in

3111

// first_element_checked to indicate that that character does not need to be

3112

// checked again.

3113

3114

// In addition to all this we are passed a Trace, which can

3115

// contain an AlternativeGeneration object. In this AlternativeGeneration

3116

// object we can see details of any quick check that was already passed in

3117

// order to get to the code we are now generating. The quick check can involve

3118

// loading characters, which means we do not need to recheck the bounds

3119

// up to the limit the quick check already checked. In addition the quick

3120

// check can have involved a mask and compare operation which may simplify

3121

// or obviate the need for further checks at some character positions.

3122

void TextNode::TextEmitPass(RegExpCompiler* compiler,

3123

TextEmitPassType pass,

3124

bool preloaded,

3125

Trace* trace,

3126

bool first_element_checked,

3127

int* checked_up_to) {

3128

Isolate* isolate = Isolate::Current();

3129

RegExpMacroAssembler* assembler = compiler->macro_assembler();

3130

bool ascii = compiler->ascii();

3131

Label* backtrack = trace->backtrack();

3132

QuickCheckDetails* quick_check = trace->quick_check_performed();

3133

int element_count = elms_->length();

3134

for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

3135

TextElement elm = elms_->at(i);

3136

int cp_offset = trace->cp_offset() + elm.cp_offset;

3137

if (elm.type == TextElement::ATOM) {

3138

Vector<const uc16> quarks = elm.data.u_atom->data();

3139

for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {

3140

if (first_element_checked && i == 0 && j == 0) continue;

3141

if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;

3142

EmitCharacterFunction* emit_function = NULL;

3143

switch (pass) {

3144

case NON_ASCII_MATCH:

3145

ASSERT(ascii);

3146

if (quarks[j] > String::kMaxAsciiCharCode) {

3147

assembler->GoTo(backtrack);

3148

return;

3149

}

3150

break;

3151

case NON_LETTER_CHARACTER_MATCH:

3152

emit_function = &EmitAtomNonLetter;

3153

break;

3154

case SIMPLE_CHARACTER_MATCH:

3155

emit_function = &EmitSimpleCharacter;

3156

break;

3157

case CASE_CHARACTER_MATCH:

3158

emit_function = &EmitAtomLetter;

3159

break;

3160

default:

3161

break;

3162

}

3163

if (emit_function != NULL) {

3164

bool bound_checked = emit_function(isolate,

3165

compiler,

3166

quarks[j],

3167

backtrack,

3168

cp_offset + j,

3169

*checked_up_to < cp_offset + j,

3170

preloaded);

3171

if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

3172

}

3173

}

3174

} else {

3175

ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);

3176

if (pass == CHARACTER_CLASS_MATCH) {

3177

if (first_element_checked && i == 0) continue;

3178

if (DeterminedAlready(quick_check, elm.cp_offset)) continue;

3179

RegExpCharacterClass* cc = elm.data.u_char_class;

3180

EmitCharClass(assembler,

3181

cc,

3182

ascii,

3183

backtrack,

3184

cp_offset,

3185

*checked_up_to < cp_offset,

3186

preloaded,

3187

zone());

3188

UpdateBoundsCheck(cp_offset, checked_up_to);

3189

}

3190

}

3191

}

3192

}

3193

3194

3195

int TextNode::Length() {

3196

TextElement elm = elms_->last();

3197

ASSERT(elm.cp_offset >= 0);

3198

if (elm.type == TextElement::ATOM) {

3199

return elm.cp_offset + elm.data.u_atom->data().length();

3200

} else {

3201

return elm.cp_offset + 1;

3202

}

3203

}

3204

3205

3206

bool TextNode::SkipPass(int int_pass, bool ignore_case) {

3207

TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);

3208

if (ignore_case) {

3209

return pass == SIMPLE_CHARACTER_MATCH;

3210

} else {

3211

return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;

3212

}

3213

}

3214

3215

3216

// This generates the code to match a text node. A text node can contain

3217

// straight character sequences (possibly to be matched in a case-independent

3218

// way) and character classes. For efficiency we do not do this in a single

3219

// pass from left to right. Instead we pass over the text node several times,

3220

// emitting code for some character positions every time. See the comment on

3221

// TextEmitPass for details.

3222

void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

3223

LimitResult limit_result = LimitVersions(compiler, trace);

3224

if (limit_result == DONE) return;

3225

ASSERT(limit_result == CONTINUE);

3226

3227

if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {

3228

compiler->SetRegExpTooBig();

3229

return;

3230

}

3231

3232

if (compiler->ascii()) {

3233

int dummy = 0;

3234

TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);

3235

}

3236

3237

bool first_elt_done = false;

3238

int bound_checked_to = trace->cp_offset() - 1;

3239

bound_checked_to += trace->bound_checked_up_to();

3240

3241

// If a character is preloaded into the current character register then

3242

// check that now.

3243

if (trace->characters_preloaded() == 1) {

3244

for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {

3245

if (!SkipPass(pass, compiler->ignore_case())) {

3246

TextEmitPass(compiler,

3247

static_cast<TextEmitPassType>(pass),

3248

true,

3249

trace,

3250

false,

3251

&bound_checked_to);

3252

}

3253

}

3254

first_elt_done = true;

3255

}

3256

3257

for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {

3258

if (!SkipPass(pass, compiler->ignore_case())) {

3259

TextEmitPass(compiler,

3260

static_cast<TextEmitPassType>(pass),

3261

false,

3262

trace,

3263

first_elt_done,

3264

&bound_checked_to);

3265

}

3266

}

3267

3268

Trace successor_trace(*trace);

3269

successor_trace.set_at_start(false);

3270

successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);

3271

RecursionCheck rc(compiler);

3272

on_success()->Emit(compiler, &successor_trace);

3273

}

3274

3275

3276

void Trace::InvalidateCurrentCharacter() {

3277

characters_preloaded_ = 0;

3278

}

3279

3280

3281

void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {

3282

ASSERT(by > 0);

3283

// We don't have an instruction for shifting the current character register

3284

// down or for using a shifted value for anything so lets just forget that

3285

// we preloaded any characters into it.

3286

characters_preloaded_ = 0;

3287

// Adjust the offsets of the quick check performed information. This

3288

// information is used to find out what we already determined about the

3289

// characters by means of mask and compare.

3290

quick_check_performed_.Advance(by, compiler->ascii());

3291

cp_offset_ += by;

3292

if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

3293

compiler->SetRegExpTooBig();

3294

cp_offset_ = 0;

3295

}

3296

bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);

3297

}

3298

3299

3300

void TextNode::MakeCaseIndependent(bool is_ascii) {

3301

int element_count = elms_->length();

3302

for (int i = 0; i < element_count; i++) {

3303

TextElement elm = elms_->at(i);

3304

if (elm.type == TextElement::CHAR_CLASS) {

3305

RegExpCharacterClass* cc = elm.data.u_char_class;

3306

// None of the standard character classes is different in the case

3307

// independent case and it slows us down if we don't know that.

3308

if (cc->is_standard(zone())) continue;

3309

ZoneList<CharacterRange>* ranges = cc->ranges(zone());

3310

int range_count = ranges->length();

3311

for (int j = 0; j < range_count; j++) {

3312

ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());

3313

}

3314

}

3315

}

3316

}

3317

3318

3319

int TextNode::GreedyLoopTextLength() {

3320

TextElement elm = elms_->at(elms_->length() - 1);

3321

if (elm.type == TextElement::CHAR_CLASS) {

3322

return elm.cp_offset + 1;

3323

} else {

3324

return elm.cp_offset + elm.data.u_atom->data().length();

3325

}

3326

}

3327

3328

3329

RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(

3330

RegExpCompiler* compiler) {

3331

if (elms_->length() != 1) return NULL;

3332

TextElement elm = elms_->at(0);

3333

if (elm.type != TextElement::CHAR_CLASS) return NULL;

3334

RegExpCharacterClass* node = elm.data.u_char_class;

3335

ZoneList<CharacterRange>* ranges = node->ranges(zone());

3336

if (!CharacterRange::IsCanonical(ranges)) {

3337

CharacterRange::Canonicalize(ranges);

3338

}

3339

if (node->is_negated()) {

3340

return ranges->length() == 0 ? on_success() : NULL;

3341

}

3342

if (ranges->length() != 1) return NULL;

3343

uint32_t max_char;

3344

if (compiler->ascii()) {

3345

max_char = String::kMaxAsciiCharCode;

3346

} else {

3347

max_char = String::kMaxUtf16CodeUnit;

3348

}

3349

return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;

3350

}

3351

3352

3353

// Finds the fixed match length of a sequence of nodes that goes from

3354

// this alternative and back to this choice node. If there are variable

3355

// length nodes or other complications in the way then return a sentinel

3356

// value indicating that a greedy loop cannot be constructed.

3357

int ChoiceNode::GreedyLoopTextLengthForAlternative(

3358

GuardedAlternative* alternative) {

3359

int length = 0;

3360

RegExpNode* node = alternative->node();

3361

// Later we will generate code for all these text nodes using recursion

3362

// so we have to limit the max number.

3363

int recursion_depth = 0;

3364

while (node != this) {

3365

if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {

3366

return kNodeIsTooComplexForGreedyLoops;

3367

}

3368

int node_length = node->GreedyLoopTextLength();

3369

if (node_length == kNodeIsTooComplexForGreedyLoops) {

3370

return kNodeIsTooComplexForGreedyLoops;

3371

}

3372

length += node_length;

3373

SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);

3374

node = seq_node->on_success();

3375

}

3376

return length;

3377

}

3378

3379

3380

void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {

3381

ASSERT_EQ(loop_node_, NULL);

3382

AddAlternative(alt);

3383

loop_node_ = alt.node();

3384

}

3385

3386

3387

void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {

3388

ASSERT_EQ(continue_node_, NULL);

3389

AddAlternative(alt);

3390

continue_node_ = alt.node();

3391

}

3392

3393

3394

void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

3395

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

3396

if (trace->stop_node() == this) {

3397

int text_length =

3398

GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));

3399

ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);

3400

// Update the counter-based backtracking info on the stack. This is an

3401

// optimization for greedy loops (see below).

3402

ASSERT(trace->cp_offset() == text_length);

3403

macro_assembler->AdvanceCurrentPosition(text_length);

3404

macro_assembler->GoTo(trace->loop_label());

3405

return;

3406

}

3407

ASSERT(trace->stop_node() == NULL);

3408

if (!trace->is_trivial()) {

3409

trace->Flush(compiler, this);

3410

return;

3411

}

3412

ChoiceNode::Emit(compiler, trace);

3413

}

3414

3415

3416

int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,

3417

int eats_at_least) {

3418

int preload_characters = Min(4, eats_at_least);

3419

if (compiler->macro_assembler()->CanReadUnaligned()) {

3420

bool ascii = compiler->ascii();

3421

if (ascii) {

3422

if (preload_characters > 4) preload_characters = 4;

3423

// We can't preload 3 characters because there is no machine instruction

3424

// to do that. We can't just load 4 because we could be reading

3425

// beyond the end of the string, which could cause a memory fault.

3426

if (preload_characters == 3) preload_characters = 2;

3427

} else {

3428

if (preload_characters > 2) preload_characters = 2;

3429

}

3430

} else {

3431

if (preload_characters > 1) preload_characters = 1;

3432

}

3433

return preload_characters;

3434

}

3435

3436

3437

// This class is used when generating the alternatives in a choice node. It

3438

// records the way the alternative is being code generated.

3439

class AlternativeGeneration: public Malloced {

3440

public:

3441

AlternativeGeneration()

3442

: possible_success(),

3443

expects_preload(false),

3444

after(),

3445

quick_check_details() { }

3446

Label possible_success;

3447

bool expects_preload;

3448

Label after;

3449

QuickCheckDetails quick_check_details;

3450

};

3451

3452

3453

// Creates a list of AlternativeGenerations. If the list has a reasonable

3454

// size then it is on the stack, otherwise the excess is on the heap.

3455

class AlternativeGenerationList {

3456

public:

3457

AlternativeGenerationList(int count, Zone* zone)

3458

: alt_gens_(count, zone) {

3459

for (int i = 0; i < count && i < kAFew; i++) {

3460

alt_gens_.Add(a_few_alt_gens_ + i, zone);

3461

}

3462

for (int i = kAFew; i < count; i++) {

3463

alt_gens_.Add(new AlternativeGeneration(), zone);

3464

}

3465

}

3466

~AlternativeGenerationList() {

3467

for (int i = kAFew; i < alt_gens_.length(); i++) {

3468

delete alt_gens_[i];

3469

alt_gens_[i] = NULL;

3470

}

3471

}

3472

3473

AlternativeGeneration* at(int i) {

3474

return alt_gens_[i];

3475

}

3476

3477

private:

3478

static const int kAFew = 10;

3479

ZoneList<AlternativeGeneration*> alt_gens_;

3480

AlternativeGeneration a_few_alt_gens_[kAFew];

3481

};

3482

3483

3484

// The '2' variant is has inclusive from and exclusive to.

3485

static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0,

3486

0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A,

3487

0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 };

3488

static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);

3489

3490

static const int kWordRanges[] = {

3491

'0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };

3492

static const int kWordRangeCount = ARRAY_SIZE(kWordRanges);

3493

static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };

3494

static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges);

3495

static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };

3496

static const int kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);

3497

static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,

3498

0x2028, 0x202A, 0x10000 };

3499

static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges);

3500

3501

3502

void BoyerMoorePositionInfo::Set(int character) {

3503

SetInterval(Interval(character, character));

3504

}

3505

3506

3507

void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {

3508

s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);

3509

w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);

3510

d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);

3511

surrogate_ =

3512

AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);

3513

if (interval.to() - interval.from() >= kMapSize - 1) {

3514

if (map_count_ != kMapSize) {

3515

map_count_ = kMapSize;

3516

for (int i = 0; i < kMapSize; i++) map_->at(i) = true;

3517

}

3518

return;

3519

}

3520

for (int i = interval.from(); i <= interval.to(); i++) {

3521

int mod_character = (i & kMask);

3522

if (!map_->at(mod_character)) {

3523

map_count_++;

3524

map_->at(mod_character) = true;

3525

}

3526

if (map_count_ == kMapSize) return;

3527

}

3528

}

3529

3530

3531

void BoyerMoorePositionInfo::SetAll() {

3532

s_ = w_ = d_ = kLatticeUnknown;

3533

if (map_count_ != kMapSize) {

3534

map_count_ = kMapSize;

3535

for (int i = 0; i < kMapSize; i++) map_->at(i) = true;

3536

}

3537

}

3538

3539

3540

BoyerMooreLookahead::BoyerMooreLookahead(

3541

int length, RegExpCompiler* compiler, Zone* zone)

3542

: length_(length),

3543

compiler_(compiler) {

3544

if (compiler->ascii()) {

3545

max_char_ = String::kMaxAsciiCharCode;

3546

} else {

3547

max_char_ = String::kMaxUtf16CodeUnit;

3548

}

3549

bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);

3550

for (int i = 0; i < length; i++) {

3551

bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);

3552

}

3553

}

3554

3555

3556

// Find the longest range of lookahead that has the fewest number of different

3557

// characters that can occur at a given position. Since we are optimizing two

3558

// different parameters at once this is a tradeoff.

3559

bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {

3560

int biggest_points = 0;

3561

// If more than 32 characters out of 128 can occur it is unlikely that we can

3562

// be lucky enough to step forwards much of the time.

3563

const int kMaxMax = 32;

3564

for (int max_number_of_chars = 4;

3565

max_number_of_chars < kMaxMax;

3566

max_number_of_chars *= 2) {

3567

biggest_points =

3568

FindBestInterval(max_number_of_chars, biggest_points, from, to);

3569

}

3570

if (biggest_points == 0) return false;

3571

return true;

3572

}

3573

3574

3575

// Find the highest-points range between 0 and length_ where the character

3576

// information is not too vague. 'Too vague' means that there are more than

3577

// max_number_of_chars that can occur at this position. Calculates the number

3578

// of points as the product of width-of-the-range and

3579

// probability-of-finding-one-of-the-characters, where the probability is

3580

// calculated using the frequency distribution of the sample subject string.

3581

int BoyerMooreLookahead::FindBestInterval(

3582

int max_number_of_chars, int old_biggest_points, int* from, int* to) {

3583

int biggest_points = old_biggest_points;

3584

static const int kSize = RegExpMacroAssembler::kTableSize;

3585

for (int i = 0; i < length_; ) {

3586

while (i < length_ && Count(i) > max_number_of_chars) i++;

3587

if (i == length_) break;

3588

int remembered_from = i;

3589

bool union_map[kSize];

3590

for (int j = 0; j < kSize; j++) union_map[j] = false;

3591

while (i < length_ && Count(i) <= max_number_of_chars) {

3592

BoyerMoorePositionInfo* map = bitmaps_->at(i);

3593

for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j);

3594

i++;

3595

}

3596

int frequency = 0;

3597

for (int j = 0; j < kSize; j++) {

3598

if (union_map[j]) {

3599

// Add 1 to the frequency to give a small per-character boost for

3600

// the cases where our sampling is not good enough and many

3601

// characters have a frequency of zero. This means the frequency

3602

// can theoretically be up to 2*kSize though we treat it mostly as

3603

// a fraction of kSize.

3604

frequency += compiler_->frequency_collator()->Frequency(j) + 1;

3605

}

3606

}

3607

// We use the probability of skipping times the distance we are skipping to

3608

// judge the effectiveness of this. Actually we have a cut-off: By

3609

// dividing by 2 we switch off the skipping if the probability of skipping

3610

// is less than 50%. This is because the multibyte mask-and-compare

3611

// skipping in quickcheck is more likely to do well on this case.

3612

bool in_quickcheck_range = ((i - remembered_from < 4) ||

3613

(compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));

3614

// Called 'probability' but it is only a rough estimate and can actually

3615

// be outside the 0-kSize range.

3616

int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;

3617

int points = (i - remembered_from) * probability;

3618

if (points > biggest_points) {

3619

*from = remembered_from;

3620

*to = i - 1;

3621

biggest_points = points;

3622

}

3623

}

3624

return biggest_points;

3625

}

3626

3627

3628

// Take all the characters that will not prevent a successful match if they

3629

// occur in the subject string in the range between min_lookahead and

3630

// max_lookahead (inclusive) measured from the current position. If the

3631

// character at max_lookahead offset is not one of these characters, then we

3632

// can safely skip forwards by the number of characters in the range.

3633

int BoyerMooreLookahead::GetSkipTable(int min_lookahead,

3634

int max_lookahead,

3635

Handle<ByteArray> boolean_skip_table) {

3636

const int kSize = RegExpMacroAssembler::kTableSize;

3637

3638

const int kSkipArrayEntry = 0;

3639

const int kDontSkipArrayEntry = 1;

3640

3641

for (int i = 0; i < kSize; i++) {

3642

boolean_skip_table->set(i, kSkipArrayEntry);

3643

}

3644

int skip = max_lookahead + 1 - min_lookahead;

3645

3646

for (int i = max_lookahead; i >= min_lookahead; i--) {

3647

BoyerMoorePositionInfo* map = bitmaps_->at(i);

3648

for (int j = 0; j < kSize; j++) {

3649

if (map->at(j)) {

3650

boolean_skip_table->set(j, kDontSkipArrayEntry);

3651

}

3652

}

3653

}

3654

3655

return skip;

3656

}

3657

3658

3659

// See comment above on the implementation of GetSkipTable.

3660

bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {

3661

const int kSize = RegExpMacroAssembler::kTableSize;

3662

3663

int min_lookahead = 0;

3664

int max_lookahead = 0;

3665

3666

if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;

3667

3668

bool found_single_character = false;

3669

int single_character = 0;

3670

for (int i = max_lookahead; i >= min_lookahead; i--) {

3671

BoyerMoorePositionInfo* map = bitmaps_->at(i);

3672

if (map->map_count() > 1 ||

3673

(found_single_character && map->map_count() != 0)) {

3674

found_single_character = false;

3675

break;

3676

}

3677

for (int j = 0; j < kSize; j++) {

3678

if (map->at(j)) {

3679

found_single_character = true;

3680

single_character = j;

3681

break;

3682

}

3683

}

3684

}

3685

3686

int lookahead_width = max_lookahead + 1 - min_lookahead;

3687

3688

if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {

3689

// The mask-compare can probably handle this better.

3690

return false;

3691

}

3692

3693

if (found_single_character) {

3694

Label cont, again;

3695

masm->Bind(&again);

3696

masm->LoadCurrentCharacter(max_lookahead, &cont, true);

3697

if (max_char_ > kSize) {

3698

masm->CheckCharacterAfterAnd(single_character,

3699

RegExpMacroAssembler::kTableMask,

3700

&cont);

3701

} else {

3702

masm->CheckCharacter(single_character, &cont);

3703

}

3704

masm->AdvanceCurrentPosition(lookahead_width);

3705

masm->GoTo(&again);

3706

masm->Bind(&cont);

3707

return true;

3708

}

3709

3710

Handle<ByteArray> boolean_skip_table =

3711

FACTORY->NewByteArray(kSize, TENURED);

3712

int skip_distance = GetSkipTable(

3713

min_lookahead, max_lookahead, boolean_skip_table);

3714

ASSERT(skip_distance != 0);

3715

3716

Label cont, again;

3717

masm->Bind(&again);

3718

masm->LoadCurrentCharacter(max_lookahead, &cont, true);

3719

masm->CheckBitInTable(boolean_skip_table, &cont);

3720

masm->AdvanceCurrentPosition(skip_distance);

3721

masm->GoTo(&again);

3722

masm->Bind(&cont);

3723

3724

return true;

3725

}

3726

3727

3728

/* Code generation for choice nodes.

3729

3730

* We generate quick checks that do a mask and compare to eliminate a

3731

* choice. If the quick check succeeds then it jumps to the continuation to

3732

* do slow checks and check subsequent nodes. If it fails (the common case)

3733

* it falls through to the next choice.

3734

3735

* Here is the desired flow graph. Nodes directly below each other imply

3736

* fallthrough. Alternatives 1 and 2 have quick checks. Alternative

3737

* 3 doesn't have a quick check so we have to call the slow check.

3738

* Nodes are marked Qn for quick checks and Sn for slow checks. The entire

3739

* regexp continuation is generated directly after the Sn node, up to the

3740

* next GoTo if we decide to reuse some already generated code. Some

3741

* nodes expect preload_characters to be preloaded into the current

3742

* character register. R nodes do this preloading. Vertices are marked

3743

* F for failures and S for success (possible success in the case of quick

3744

* nodes). L, V, < and > are used as arrow heads.

3745

3746

* ----------> R

3747

* |

3748

* V

3749

* Q1 -----> S1

3750

* | S /

3751

* F| /

3752

* | F/

3753

* | /

3754

* | R

3755

* | /

3756

* V L

3757

* Q2 -----> S2

3758

* | S /

3759

* F| /

3760

* | F/

3761

* | /

3762

* | R

3763

* | /

3764

* V L

3765

* S3

3766

* |

3767

* F|

3768

* |

3769

* R

3770

* |

3771

* backtrack V

3772

* <----------Q4

3773

* \ F |

3774

* \ |S

3775

* \ F V

3776

* \-----S4

3777

3778

* For greedy loops we reverse our expectation and expect to match rather

3779

* than fail. Therefore we want the loop code to look like this (U is the

3780

* unwind code that steps back in the greedy loop). The following alternatives

3781

* look the same as above.

3782

* _____

3783

* / \

3784

* V |

3785

* ----------> S1 |

3786

* /| |

3787

* / |S |

3788

* F/ \_____/

3789

* /

3790

* |<-----------

3791

* | \

3792

* V \

3793

* Q2 ---> S2 \

3794

* | S / |

3795

* F| / |

3796

* | F/ |

3797

* | / |

3798

* | R |

3799

* | / |

3800

* F VL |

3801

* <------U |

3802

* back |S |

3803

* \______________/

3804

3805

3806

void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

3807

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

3808

int choice_count = alternatives_->length();

3809

#ifdef DEBUG

3810

for (int i = 0; i < choice_count - 1; i++) {

3811

GuardedAlternative alternative = alternatives_->at(i);

3812

ZoneList<Guard*>* guards = alternative.guards();

3813

int guard_count = (guards == NULL) ? 0 : guards->length();

3814

for (int j = 0; j < guard_count; j++) {

3815

ASSERT(!trace->mentions_reg(guards->at(j)->reg()));

3816

}

3817

}

3818

#endif

3819

3820

LimitResult limit_result = LimitVersions(compiler, trace);

3821

if (limit_result == DONE) return;

3822

ASSERT(limit_result == CONTINUE);

3823

3824

int new_flush_budget = trace->flush_budget() / choice_count;

3825

if (trace->flush_budget() == 0 && trace->actions() != NULL) {

3826

trace->Flush(compiler, this);

3827

return;

3828

}

3829

3830

RecursionCheck rc(compiler);

3831

3832

Trace* current_trace = trace;

3833

3834

int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));

3835

bool greedy_loop = false;

3836

Label greedy_loop_label;

3837

Trace counter_backtrack_trace;

3838

counter_backtrack_trace.set_backtrack(&greedy_loop_label);

3839

if (not_at_start()) counter_backtrack_trace.set_at_start(false);

3840

3841

if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {

3842

// Here we have special handling for greedy loops containing only text nodes

3843

// and other simple nodes. These are handled by pushing the current

3844

// position on the stack and then incrementing the current position each

3845

// time around the switch. On backtrack we decrement the current position

3846

// and check it against the pushed value. This avoids pushing backtrack

3847

// information for each iteration of the loop, which could take up a lot of

3848

// space.

3849

greedy_loop = true;

3850

ASSERT(trace->stop_node() == NULL);

3851

macro_assembler->PushCurrentPosition();

3852

current_trace = &counter_backtrack_trace;

3853

Label greedy_match_failed;

3854

Trace greedy_match_trace;

3855

if (not_at_start()) greedy_match_trace.set_at_start(false);

3856

greedy_match_trace.set_backtrack(&greedy_match_failed);

3857

Label loop_label;

3858

macro_assembler->Bind(&loop_label);

3859

greedy_match_trace.set_stop_node(this);

3860

greedy_match_trace.set_loop_label(&loop_label);

3861

alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);

3862

macro_assembler->Bind(&greedy_match_failed);

3863

}

3864

3865

Label second_choice; // For use in greedy matches.

3866

macro_assembler->Bind(&second_choice);

3867

3868

int first_normal_choice = greedy_loop ? 1 : 0;

3869

3870

bool not_at_start = current_trace->at_start() == Trace::FALSE;

3871

const int kEatsAtLeastNotYetInitialized = -1;

3872

int eats_at_least = kEatsAtLeastNotYetInitialized;

3873

3874

bool skip_was_emitted = false;

3875

3876

if (!greedy_loop && choice_count == 2) {

3877

GuardedAlternative alt1 = alternatives_->at(1);

3878

if (alt1.guards() == NULL || alt1.guards()->length() == 0) {

3879

RegExpNode* eats_anything_node = alt1.node();

3880

if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==

3881

this) {

3882

// At this point we know that we are at a non-greedy loop that will eat

3883

// any character one at a time. Any non-anchored regexp has such a

3884

// loop prepended to it in order to find where it starts. We look for

3885

// a pattern of the form ...abc... where we can look 6 characters ahead

3886

// and step forwards 3 if the character is not one of abc. Abc need

3887

// not be atoms, they can be any reasonably limited character class or

3888

// small alternation.

3889

ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.

3890

BoyerMooreLookahead* lookahead = bm_info(not_at_start);

3891

if (lookahead == NULL) {

3892

eats_at_least =

3893

Min(kMaxLookaheadForBoyerMoore,

3894

EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));

3895

if (eats_at_least >= 1) {

3896

BoyerMooreLookahead* bm =

3897

new(zone()) BoyerMooreLookahead(eats_at_least,

3898

compiler,

3899

zone());

3900

GuardedAlternative alt0 = alternatives_->at(0);

3901

alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);

3902

skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);

3903

}

3904

} else {

3905

skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);

3906

}

3907

}

3908

}

3909

}

3910

3911

if (eats_at_least == kEatsAtLeastNotYetInitialized) {

3912

// Save some time by looking at most one machine word ahead.

3913

eats_at_least = EatsAtLeast(compiler->ascii() ? 4 : 2, 0, not_at_start);

3914

}

3915

int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);

3916

3917

bool preload_is_current = !skip_was_emitted &&

3918

(current_trace->characters_preloaded() == preload_characters);

3919

bool preload_has_checked_bounds = preload_is_current;

3920

3921

AlternativeGenerationList alt_gens(choice_count, zone());

3922

3923

// For now we just call all choices one after the other. The idea ultimately

3924

// is to use the Dispatch table to try only the relevant ones.

3925

for (int i = first_normal_choice; i < choice_count; i++) {

3926

GuardedAlternative alternative = alternatives_->at(i);

3927

AlternativeGeneration* alt_gen = alt_gens.at(i);

3928

alt_gen->quick_check_details.set_characters(preload_characters);

3929

ZoneList<Guard*>* guards = alternative.guards();

3930

int guard_count = (guards == NULL) ? 0 : guards->length();

3931

Trace new_trace(*current_trace);

3932

new_trace.set_characters_preloaded(preload_is_current ?

3933

preload_characters :

3934

0);

3935

if (preload_has_checked_bounds) {

3936

new_trace.set_bound_checked_up_to(preload_characters);

3937

}

3938

new_trace.quick_check_performed()->Clear();

3939

if (not_at_start_) new_trace.set_at_start(Trace::FALSE);

3940

alt_gen->expects_preload = preload_is_current;

3941

bool generate_full_check_inline = false;

3942

if (FLAG_regexp_optimization &&

3943

try_to_emit_quick_check_for_alternative(i) &&

3944

alternative.node()->EmitQuickCheck(compiler,

3945

&new_trace,

3946

preload_has_checked_bounds,

3947

&alt_gen->possible_success,

3948

&alt_gen->quick_check_details,

3949

i < choice_count - 1)) {

3950

// Quick check was generated for this choice.

3951

preload_is_current = true;

3952

preload_has_checked_bounds = true;

3953

// On the last choice in the ChoiceNode we generated the quick

3954

// check to fall through on possible success. So now we need to

3955

// generate the full check inline.

3956

if (i == choice_count - 1) {

3957

macro_assembler->Bind(&alt_gen->possible_success);

3958

new_trace.set_quick_check_performed(&alt_gen->quick_check_details);

3959

new_trace.set_characters_preloaded(preload_characters);

3960

new_trace.set_bound_checked_up_to(preload_characters);

3961

generate_full_check_inline = true;

3962

}

3963

} else if (alt_gen->quick_check_details.cannot_match()) {

3964

if (i == choice_count - 1 && !greedy_loop) {

3965

macro_assembler->GoTo(trace->backtrack());

3966

}

3967

continue;

3968

} else {

3969

// No quick check was generated. Put the full code here.

3970

// If this is not the first choice then there could be slow checks from

3971

// previous cases that go here when they fail. There's no reason to

3972

// insist that they preload characters since the slow check we are about

3973

// to generate probably can't use it.

3974

if (i != first_normal_choice) {

3975

alt_gen->expects_preload = false;

3976

new_trace.InvalidateCurrentCharacter();

3977

}

3978

if (i < choice_count - 1) {

3979

new_trace.set_backtrack(&alt_gen->after);

3980

}

3981

generate_full_check_inline = true;

3982

}

3983

if (generate_full_check_inline) {

3984

if (new_trace.actions() != NULL) {

3985

new_trace.set_flush_budget(new_flush_budget);

3986

}

3987

for (int j = 0; j < guard_count; j++) {

3988

GenerateGuard(macro_assembler, guards->at(j), &new_trace);

3989

}

3990

alternative.node()->Emit(compiler, &new_trace);

3991

preload_is_current = false;

3992

}

3993

macro_assembler->Bind(&alt_gen->after);

3994

}

3995

if (greedy_loop) {

3996

macro_assembler->Bind(&greedy_loop_label);

3997

// If we have unwound to the bottom then backtrack.

3998

macro_assembler->CheckGreedyLoop(trace->backtrack());

3999

// Otherwise try the second priority at an earlier position.

4000

macro_assembler->AdvanceCurrentPosition(-text_length);

4001

macro_assembler->GoTo(&second_choice);

4002

}

4003

4004

// At this point we need to generate slow checks for the alternatives where

4005

// the quick check was inlined. We can recognize these because the associated

4006

// label was bound.

4007

for (int i = first_normal_choice; i < choice_count - 1; i++) {

4008

AlternativeGeneration* alt_gen = alt_gens.at(i);

4009

Trace new_trace(*current_trace);

4010

// If there are actions to be flushed we have to limit how many times

4011

// they are flushed. Take the budget of the parent trace and distribute

4012

// it fairly amongst the children.

4013

if (new_trace.actions() != NULL) {

4014

new_trace.set_flush_budget(new_flush_budget);

4015

}

4016

EmitOutOfLineContinuation(compiler,

4017

&new_trace,

4018

alternatives_->at(i),

4019

alt_gen,

4020

preload_characters,

4021

alt_gens.at(i + 1)->expects_preload);

4022

}

4023

}

4024

4025

4026

void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,

4027

Trace* trace,

4028

GuardedAlternative alternative,

4029

AlternativeGeneration* alt_gen,

4030

int preload_characters,

4031

bool next_expects_preload) {

4032

if (!alt_gen->possible_success.is_linked()) return;

4033

4034

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

4035

macro_assembler->Bind(&alt_gen->possible_success);

4036

Trace out_of_line_trace(*trace);

4037

out_of_line_trace.set_characters_preloaded(preload_characters);

4038

out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);

4039

if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE);

4040

ZoneList<Guard*>* guards = alternative.guards();

4041

int guard_count = (guards == NULL) ? 0 : guards->length();

4042

if (next_expects_preload) {

4043

Label reload_current_char;

4044

out_of_line_trace.set_backtrack(&reload_current_char);

4045

for (int j = 0; j < guard_count; j++) {

4046

GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);

4047

}

4048

alternative.node()->Emit(compiler, &out_of_line_trace);

4049

macro_assembler->Bind(&reload_current_char);

4050

// Reload the current character, since the next quick check expects that.

4051

// We don't need to check bounds here because we only get into this

4052

// code through a quick check which already did the checked load.

4053

macro_assembler->LoadCurrentCharacter(trace->cp_offset(),

4054

NULL,

4055

false,

4056

preload_characters);

4057

macro_assembler->GoTo(&(alt_gen->after));

4058

} else {

4059

out_of_line_trace.set_backtrack(&(alt_gen->after));

4060

for (int j = 0; j < guard_count; j++) {

4061

GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);

4062

}

4063

alternative.node()->Emit(compiler, &out_of_line_trace);

4064

}

4065

}

4066

4067

4068

void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

4069

RegExpMacroAssembler* assembler = compiler->macro_assembler();

4070

LimitResult limit_result = LimitVersions(compiler, trace);

4071

if (limit_result == DONE) return;

4072

ASSERT(limit_result == CONTINUE);

4073

4074

RecursionCheck rc(compiler);

4075

4076

switch (type_) {

4077

case STORE_POSITION: {

4078

Trace::DeferredCapture

4079

new_capture(data_.u_position_register.reg,

4080

data_.u_position_register.is_capture,

4081

trace);

4082

Trace new_trace = *trace;

4083

new_trace.add_action(&new_capture);

4084

on_success()->Emit(compiler, &new_trace);

4085

break;

4086

}

4087

case INCREMENT_REGISTER: {

4088

Trace::DeferredIncrementRegister

4089

new_increment(data_.u_increment_register.reg);

4090

Trace new_trace = *trace;

4091

new_trace.add_action(&new_increment);

4092

on_success()->Emit(compiler, &new_trace);

4093

break;

4094

}

4095

case SET_REGISTER: {

4096

Trace::DeferredSetRegister

4097

new_set(data_.u_store_register.reg, data_.u_store_register.value);

4098

Trace new_trace = *trace;

4099

new_trace.add_action(&new_set);

4100

on_success()->Emit(compiler, &new_trace);

4101

break;

4102

}

4103

case CLEAR_CAPTURES: {

4104

Trace::DeferredClearCaptures

4105

new_capture(Interval(data_.u_clear_captures.range_from,

4106

data_.u_clear_captures.range_to));

4107

Trace new_trace = *trace;

4108

new_trace.add_action(&new_capture);

4109

on_success()->Emit(compiler, &new_trace);

4110

break;

4111

}

4112

case BEGIN_SUBMATCH:

4113

if (!trace->is_trivial()) {

4114

trace->Flush(compiler, this);

4115

} else {

4116

assembler->WriteCurrentPositionToRegister(

4117

data_.u_submatch.current_position_register, 0);

4118

assembler->WriteStackPointerToRegister(

4119

data_.u_submatch.stack_pointer_register);

4120

on_success()->Emit(compiler, trace);

4121

}

4122

break;

4123

case EMPTY_MATCH_CHECK: {

4124

int start_pos_reg = data_.u_empty_match_check.start_register;

4125

int stored_pos = 0;

4126

int rep_reg = data_.u_empty_match_check.repetition_register;

4127

bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);

4128

bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);

4129

if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {

4130

// If we know we haven't advanced and there is no minimum we

4131

// can just backtrack immediately.

4132

assembler->GoTo(trace->backtrack());

4133

} else if (know_dist && stored_pos < trace->cp_offset()) {

4134

// If we know we've advanced we can generate the continuation

4135

// immediately.

4136

on_success()->Emit(compiler, trace);

4137

} else if (!trace->is_trivial()) {

4138

trace->Flush(compiler, this);

4139

} else {

4140

Label skip_empty_check;

4141

// If we have a minimum number of repetitions we check the current

4142

// number first and skip the empty check if it's not enough.

4143

if (has_minimum) {

4144

int limit = data_.u_empty_match_check.repetition_limit;

4145

assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);

4146

}

4147

// If the match is empty we bail out, otherwise we fall through

4148

// to the on-success continuation.

4149

assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,

4150

trace->backtrack());

4151

assembler->Bind(&skip_empty_check);

4152

on_success()->Emit(compiler, trace);

4153

}

4154

break;

4155

}

4156

case POSITIVE_SUBMATCH_SUCCESS: {

4157

if (!trace->is_trivial()) {

4158

trace->Flush(compiler, this);

4159

return;

4160

}

4161

assembler->ReadCurrentPositionFromRegister(

4162

data_.u_submatch.current_position_register);

4163

assembler->ReadStackPointerFromRegister(

4164

data_.u_submatch.stack_pointer_register);

4165

int clear_register_count = data_.u_submatch.clear_register_count;

4166

if (clear_register_count == 0) {

4167

on_success()->Emit(compiler, trace);

4168

return;

4169

}

4170

int clear_registers_from = data_.u_submatch.clear_register_from;

4171

Label clear_registers_backtrack;

4172

Trace new_trace = *trace;

4173

new_trace.set_backtrack(&clear_registers_backtrack);

4174

on_success()->Emit(compiler, &new_trace);

4175

4176

assembler->Bind(&clear_registers_backtrack);

4177

int clear_registers_to = clear_registers_from + clear_register_count - 1;

4178

assembler->ClearRegisters(clear_registers_from, clear_registers_to);

4179

4180

ASSERT(trace->backtrack() == NULL);

4181

assembler->Backtrack();

4182

return;

4183

}

4184

default:

4185

UNREACHABLE();

4186

}

4187

}

4188

4189

4190

void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

4191

RegExpMacroAssembler* assembler = compiler->macro_assembler();

4192

if (!trace->is_trivial()) {

4193

trace->Flush(compiler, this);

4194

return;

4195

}

4196

4197

LimitResult limit_result = LimitVersions(compiler, trace);

4198

if (limit_result == DONE) return;

4199

ASSERT(limit_result == CONTINUE);

4200

4201

RecursionCheck rc(compiler);

4202

4203

ASSERT_EQ(start_reg_ + 1, end_reg_);

4204

if (compiler->ignore_case()) {

4205

assembler->CheckNotBackReferenceIgnoreCase(start_reg_,

4206

trace->backtrack());

4207

} else {

4208

assembler->CheckNotBackReference(start_reg_, trace->backtrack());

4209

}

4210

on_success()->Emit(compiler, trace);

4211

}

4212

4213

4214

// -------------------------------------------------------------------

4215

// Dot/dotty output

4216

4217

4218

#ifdef DEBUG

4219

4220

4221

class DotPrinter: public NodeVisitor {

4222

public:

4223

explicit DotPrinter(bool ignore_case)

4224

: ignore_case_(ignore_case),

4225

stream_(&alloc_) { }

4226

void PrintNode(const char* label, RegExpNode* node);

4227

void Visit(RegExpNode* node);

4228

void PrintAttributes(RegExpNode* from);

4229

StringStream* stream() { return &stream_; }

4230

void PrintOnFailure(RegExpNode* from, RegExpNode* to);

4231

#define DECLARE_VISIT(Type) \

4232

virtual void Visit##Type(Type##Node* that);

4233

FOR_EACH_NODE_TYPE(DECLARE_VISIT)

4234

#undef DECLARE_VISIT

4235

private:

4236

bool ignore_case_;

4237

HeapStringAllocator alloc_;

4238

StringStream stream_;

4239

};

4240

4241

4242

void DotPrinter::PrintNode(const char* label, RegExpNode* node) {

4243

stream()->Add("digraph G {\n graph [label=\"");

4244

for (int i = 0; label[i]; i++) {

4245

switch (label[i]) {

4246

case '\\':

4247

stream()->Add("\\\\");

4248

break;

4249

case '"':

4250

stream()->Add("\"");

4251

break;

4252

default:

4253

stream()->Put(label[i]);

4254

break;

4255

}

4256

}

4257

stream()->Add("\"];\n");

4258

Visit(node);

4259

stream()->Add("}\n");

4260

printf("%s", *(stream()->ToCString()));

4261

}

4262

4263

4264

void DotPrinter::Visit(RegExpNode* node) {

4265

if (node->info()->visited) return;

4266

node->info()->visited = true;

4267

node->Accept(this);

4268

}

4269

4270

4271

void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {

4272

stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);

4273

Visit(on_failure);

4274

}

4275

4276

4277

class TableEntryBodyPrinter {

4278

public:

4279

TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)

4280

: stream_(stream), choice_(choice) { }

4281

void Call(uc16 from, DispatchTable::Entry entry) {

4282

OutSet* out_set = entry.out_set();

4283

for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {

4284

if (out_set->Get(i)) {

4285

stream()->Add(" n%p:s%io%i -> n%p;\n",

4286

choice(),

4287

from,

4288

4289

choice()->alternatives()->at(i).node());

4290

}

4291

}

4292

}

4293

private:

4294

StringStream* stream() { return stream_; }

4295

ChoiceNode* choice() { return choice_; }

4296

StringStream* stream_;

4297

ChoiceNode* choice_;

4298

};

4299

4300

4301

class TableEntryHeaderPrinter {

4302

public:

4303

explicit TableEntryHeaderPrinter(StringStream* stream)

4304

: first_(true), stream_(stream) { }

4305

void Call(uc16 from, DispatchTable::Entry entry) {

4306

if (first_) {

4307

first_ = false;

4308

} else {

4309

stream()->Add("|");

4310

}

4311

stream()->Add("{\\%k-\\%k|{", from, entry.to());

4312

OutSet* out_set = entry.out_set();

4313

int priority = 0;

4314

for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {

4315

if (out_set->Get(i)) {

4316

if (priority > 0) stream()->Add("|");

4317

stream()->Add("<s%io%i> %i", from, i, priority);

4318

priority++;

4319

}

4320

}

4321

stream()->Add("}}");

4322

}

4323

4324

private:

4325

bool first_;

4326

StringStream* stream() { return stream_; }

4327

StringStream* stream_;

4328

};

4329

4330

4331

class AttributePrinter {

4332

public:

4333

explicit AttributePrinter(DotPrinter* out)

4334

: out_(out), first_(true) { }

4335

void PrintSeparator() {

4336

if (first_) {

4337

first_ = false;

4338

} else {

4339

out_->stream()->Add("|");

4340

}

4341

}

4342

void PrintBit(const char* name, bool value) {

4343

if (!value) return;

4344

PrintSeparator();

4345

out_->stream()->Add("{%s}", name);

4346

}

4347

void PrintPositive(const char* name, int value) {

4348

if (value < 0) return;

4349

PrintSeparator();

4350

out_->stream()->Add("{%s|%x}", name, value);

4351

}

4352

private:

4353

DotPrinter* out_;

4354

bool first_;

4355

};

4356

4357

4358

void DotPrinter::PrintAttributes(RegExpNode* that) {

4359

stream()->Add(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "

4360

"margin=0.1, fontsize=10, label=\"{",

4361

that);

4362

AttributePrinter printer(this);

4363

NodeInfo* info = that->info();

4364

printer.PrintBit("NI", info->follows_newline_interest);

4365

printer.PrintBit("WI", info->follows_word_interest);

4366

printer.PrintBit("SI", info->follows_start_interest);

4367

Label* label = that->label();

4368

if (label->is_bound())

4369

printer.PrintPositive("@", label->pos());

4370

stream()->Add("}\"];\n");

4371

stream()->Add(" a%p -> n%p [style=dashed, color=grey, "

4372

"arrowhead=none];\n", that, that);

4373

}

4374

4375

4376

static const bool kPrintDispatchTable = false;

4377

void DotPrinter::VisitChoice(ChoiceNode* that) {

4378

if (kPrintDispatchTable) {

4379

stream()->Add(" n%p [shape=Mrecord, label=\"", that);

4380

TableEntryHeaderPrinter header_printer(stream());

4381

that->GetTable(ignore_case_)->ForEach(&header_printer);

4382

stream()->Add("\"]\n", that);

4383

PrintAttributes(that);

4384

TableEntryBodyPrinter body_printer(stream(), that);

4385

that->GetTable(ignore_case_)->ForEach(&body_printer);

4386

} else {

4387

stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);

4388

for (int i = 0; i < that->alternatives()->length(); i++) {

4389

GuardedAlternative alt = that->alternatives()->at(i);

4390

stream()->Add(" n%p -> n%p;\n", that, alt.node());

4391

}

4392

}

4393

for (int i = 0; i < that->alternatives()->length(); i++) {

4394

GuardedAlternative alt = that->alternatives()->at(i);

4395

alt.node()->Accept(this);

4396

}

4397

}

4398

4399

4400

void DotPrinter::VisitText(TextNode* that) {

4401

Zone* zone = that->zone();

4402

stream()->Add(" n%p [label=\"", that);

4403

for (int i = 0; i < that->elements()->length(); i++) {

4404

if (i > 0) stream()->Add(" ");

4405

TextElement elm = that->elements()->at(i);

4406

switch (elm.type) {

4407

case TextElement::ATOM: {

4408

stream()->Add("'%w'", elm.data.u_atom->data());

4409

break;

4410

}

4411

case TextElement::CHAR_CLASS: {

4412

RegExpCharacterClass* node = elm.data.u_char_class;

4413

stream()->Add("[");

4414

if (node->is_negated())

4415

stream()->Add("^");

4416

for (int j = 0; j < node->ranges(zone)->length(); j++) {

4417

CharacterRange range = node->ranges(zone)->at(j);

4418

stream()->Add("%k-%k", range.from(), range.to());

4419

}

4420

stream()->Add("]");

4421

break;

4422

}

4423

default:

4424

UNREACHABLE();

4425

}

4426

}

4427

stream()->Add("\", shape=box, peripheries=2];\n");

4428

PrintAttributes(that);

4429

stream()->Add(" n%p -> n%p;\n", that, that->on_success());

4430

Visit(that->on_success());

4431

}

4432

4433

4434

void DotPrinter::VisitBackReference(BackReferenceNode* that) {

4435

stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",

4436

that,

4437

that->start_register(),

4438

that->end_register());

4439

PrintAttributes(that);

4440

stream()->Add(" n%p -> n%p;\n", that, that->on_success());

4441

Visit(that->on_success());

4442

}

4443

4444

4445

void DotPrinter::VisitEnd(EndNode* that) {

4446

stream()->Add(" n%p [style=bold, shape=point];\n", that);

4447

PrintAttributes(that);

4448

}

4449

4450

4451

void DotPrinter::VisitAssertion(AssertionNode* that) {

4452

stream()->Add(" n%p [", that);

4453

switch (that->type()) {

4454

case AssertionNode::AT_END:

4455

stream()->Add("label=\"$\", shape=septagon");

4456

break;

4457

case AssertionNode::AT_START:

4458

stream()->Add("label=\"^\", shape=septagon");

4459

break;

4460

case AssertionNode::AT_BOUNDARY:

4461

stream()->Add("label=\"\\b\", shape=septagon");

4462

break;

4463

case AssertionNode::AT_NON_BOUNDARY:

4464

stream()->Add("label=\"\\B\", shape=septagon");

4465

break;

4466

case AssertionNode::AFTER_NEWLINE:

4467

stream()->Add("label=\"(?<=\\n)\", shape=septagon");

4468

break;

4469

}

4470

stream()->Add("];\n");

4471

PrintAttributes(that);

4472

RegExpNode* successor = that->on_success();

4473

stream()->Add(" n%p -> n%p;\n", that, successor);

4474

Visit(successor);

4475

}

4476

4477

4478

void DotPrinter::VisitAction(ActionNode* that) {

4479

stream()->Add(" n%p [", that);

4480

switch (that->type_) {

4481

case ActionNode::SET_REGISTER:

4482

stream()->Add("label=\"$%i:=%i\", shape=octagon",

4483

that->data_.u_store_register.reg,

4484

that->data_.u_store_register.value);

4485

break;

4486

case ActionNode::INCREMENT_REGISTER:

4487

stream()->Add("label=\"$%i++\", shape=octagon",

4488

that->data_.u_increment_register.reg);

4489

break;

4490

case ActionNode::STORE_POSITION:

4491

stream()->Add("label=\"$%i:=$pos\", shape=octagon",

4492

that->data_.u_position_register.reg);

4493

break;

4494

case ActionNode::BEGIN_SUBMATCH:

4495

stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",

4496

that->data_.u_submatch.current_position_register);

4497

break;

4498

case ActionNode::POSITIVE_SUBMATCH_SUCCESS:

4499

stream()->Add("label=\"escape\", shape=septagon");

4500

break;

4501

case ActionNode::EMPTY_MATCH_CHECK:

4502

stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",

4503

that->data_.u_empty_match_check.start_register,

4504

that->data_.u_empty_match_check.repetition_register,

4505

that->data_.u_empty_match_check.repetition_limit);

4506

break;

4507

case ActionNode::CLEAR_CAPTURES: {

4508

stream()->Add("label=\"clear $%i to $%i\", shape=septagon",

4509

that->data_.u_clear_captures.range_from,

4510

that->data_.u_clear_captures.range_to);

4511

break;

4512

}

4513

}

4514

stream()->Add("];\n");

4515

PrintAttributes(that);

4516

RegExpNode* successor = that->on_success();

4517

stream()->Add(" n%p -> n%p;\n", that, successor);

4518

Visit(successor);

4519

}

4520

4521

4522

class DispatchTableDumper {

4523

public:

4524

explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }

4525

void Call(uc16 key, DispatchTable::Entry entry);

4526

StringStream* stream() { return stream_; }

4527

private:

4528

StringStream* stream_;

4529

};

4530

4531

4532

void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {

4533

stream()->Add("[%k-%k]: {", key, entry.to());

4534

OutSet* set = entry.out_set();

4535

bool first = true;

4536

for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {

4537

if (set->Get(i)) {

4538

if (first) {

4539

first = false;

4540

} else {

4541

stream()->Add(", ");

4542

}

4543

stream()->Add("%i", i);

4544

}

4545

}

4546

stream()->Add("}\n");

4547

}

4548

4549

4550

void DispatchTable::Dump() {

4551

HeapStringAllocator alloc;

4552

StringStream stream(&alloc);

4553

DispatchTableDumper dumper(&stream);

4554

tree()->ForEach(&dumper);

4555

OS::PrintError("%s", *stream.ToCString());

4556

}

4557

4558

4559

void RegExpEngine::DotPrint(const char* label,

4560

RegExpNode* node,

4561

bool ignore_case) {

4562

DotPrinter printer(ignore_case);

4563

printer.PrintNode(label, node);

4564

}

4565

4566

4567

#endif // DEBUG

4568

4569

4570

// -------------------------------------------------------------------

4571

// Tree to graph conversion

4572

4573

RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,

4574

RegExpNode* on_success) {

4575

ZoneList<TextElement>* elms =

4576

new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());

4577

elms->Add(TextElement::Atom(this), compiler->zone());

4578

return new(compiler->zone()) TextNode(elms, on_success);

4579

}

4580

4581

4582

RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

4583

RegExpNode* on_success) {

4584

return new(compiler->zone()) TextNode(elements(), on_success);

4585

}

4586

4587

4588

static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,

4589

const int* special_class,

4590

int length) {

4591

length--; // Remove final 0x10000.

4592

ASSERT(special_class[length] == 0x10000);

4593

ASSERT(ranges->length() != 0);

4594

ASSERT(length != 0);

4595

ASSERT(special_class[0] != 0);

4596

if (ranges->length() != (length >> 1) + 1) {

4597

return false;

4598

}

4599

CharacterRange range = ranges->at(0);

4600

if (range.from() != 0) {

4601

return false;

4602

}

4603

for (int i = 0; i < length; i += 2) {

4604

if (special_class[i] != (range.to() + 1)) {

4605

return false;

4606

}

4607

range = ranges->at((i >> 1) + 1);

4608

if (special_class[i+1] != range.from()) {

4609

return false;

4610

}

4611

}

4612

if (range.to() != 0xffff) {

4613

return false;

4614

}

4615

return true;

4616

}

4617

4618

4619

static bool CompareRanges(ZoneList<CharacterRange>* ranges,

4620

const int* special_class,

4621

int length) {

4622

length--; // Remove final 0x10000.

4623

ASSERT(special_class[length] == 0x10000);

4624

if (ranges->length() * 2 != length) {

4625

return false;

4626

}

4627

for (int i = 0; i < length; i += 2) {

4628

CharacterRange range = ranges->at(i >> 1);

4629

if (range.from() != special_class[i] ||

4630

range.to() != special_class[i + 1] - 1) {

4631

return false;

4632

}

4633

}

4634

return true;

4635

}

4636

4637

4638

bool RegExpCharacterClass::is_standard(Zone* zone) {

4639

// TODO(lrn): Remove need for this function, by not throwing away information

4640

// along the way.

4641

if (is_negated_) {

4642

return false;

4643

}

4644

if (set_.is_standard()) {

4645

return true;

4646

}

4647

if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {

4648

set_.set_standard_set_type('s');

4649

return true;

4650

}

4651

if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {

4652

set_.set_standard_set_type('S');

4653

return true;

4654

}

4655

if (CompareInverseRanges(set_.ranges(zone),

4656

kLineTerminatorRanges,

4657

kLineTerminatorRangeCount)) {

4658

set_.set_standard_set_type('.');

4659

return true;

4660

}

4661

if (CompareRanges(set_.ranges(zone),

4662

kLineTerminatorRanges,

4663

kLineTerminatorRangeCount)) {

4664

set_.set_standard_set_type('n');

4665

return true;

4666

}

4667

if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {

4668

set_.set_standard_set_type('w');

4669

return true;

4670

}

4671

if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {

4672

set_.set_standard_set_type('W');

4673

return true;

4674

}

4675

return false;

4676

}

4677

4678

4679

RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

4680

RegExpNode* on_success) {

4681

return new(compiler->zone()) TextNode(this, on_success);

4682

}

4683

4684

4685

RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,

4686

RegExpNode* on_success) {

4687

ZoneList<RegExpTree*>* alternatives = this->alternatives();

4688

int length = alternatives->length();

4689

ChoiceNode* result =

4690

new(compiler->zone()) ChoiceNode(length, compiler->zone());

4691

for (int i = 0; i < length; i++) {

4692

GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,

4693

on_success));

4694

result->AddAlternative(alternative);

4695

}

4696

return result;

4697

}

4698

4699

4700

RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,

4701

RegExpNode* on_success) {

4702

return ToNode(min(),

4703

max(),

4704

is_greedy(),

4705

body(),

4706

compiler,

4707

on_success);

4708

}

4709

4710

4711

// Scoped object to keep track of how much we unroll quantifier loops in the

4712

// regexp graph generator.

4713

class RegExpExpansionLimiter {

4714

public:

4715

static const int kMaxExpansionFactor = 6;

4716

RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)

4717

: compiler_(compiler),

4718

saved_expansion_factor_(compiler->current_expansion_factor()),

4719

ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {

4720

ASSERT(factor > 0);

4721

if (ok_to_expand_) {

4722

if (factor > kMaxExpansionFactor) {

4723

// Avoid integer overflow of the current expansion factor.

4724

ok_to_expand_ = false;

4725

compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);

4726

} else {

4727

int new_factor = saved_expansion_factor_ * factor;

4728

ok_to_expand_ = (new_factor <= kMaxExpansionFactor);

4729

compiler->set_current_expansion_factor(new_factor);

4730

}

4731

}

4732

}

4733

4734

~RegExpExpansionLimiter() {

4735

compiler_->set_current_expansion_factor(saved_expansion_factor_);

4736

}

4737

4738

bool ok_to_expand() { return ok_to_expand_; }

4739

4740

private:

4741

RegExpCompiler* compiler_;

4742

int saved_expansion_factor_;

4743

bool ok_to_expand_;

4744

4745

DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);

4746

};

4747

4748

4749

RegExpNode* RegExpQuantifier::ToNode(int min,

4750

int max,

4751

bool is_greedy,

4752

RegExpTree* body,

4753

RegExpCompiler* compiler,

4754

RegExpNode* on_success,

4755

bool not_at_start) {

4756

// x{f, t} becomes this:

4757

4758

// (r++)<-.

4759

// | `

4760

// | (x)

4761

// v ^

4762

// (r=0)-->(?)---/ [if r < t]

4763

// |

4764

// [if r >= f] \----> ...

4765

4766

4767

// 15.10.2.5 RepeatMatcher algorithm.

4768

// The parser has already eliminated the case where max is 0. In the case

4769

// where max_match is zero the parser has removed the quantifier if min was

4770

// > 0 and removed the atom if min was 0. See AddQuantifierToAtom.

4771

4772

// If we know that we cannot match zero length then things are a little

4773

// simpler since we don't need to make the special zero length match check

4774

// from step 2.1. If the min and max are small we can unroll a little in

4775

// this case.

4776

static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}

4777

static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}

4778

if (max == 0) return on_success; // This can happen due to recursion.

4779

bool body_can_be_empty = (body->min_match() == 0);

4780

int body_start_reg = RegExpCompiler::kNoRegister;

4781

Interval capture_registers = body->CaptureRegisters();

4782

bool needs_capture_clearing = !capture_registers.is_empty();

4783

Zone* zone = compiler->zone();

4784

4785

if (body_can_be_empty) {

4786

body_start_reg = compiler->AllocateRegister();

4787

} else if (FLAG_regexp_optimization && !needs_capture_clearing) {

4788

// Only unroll if there are no captures and the body can't be

4789

// empty.

4790

{

4791

RegExpExpansionLimiter limiter(

4792

compiler, min + ((max != min) ? 1 : 0));

4793

if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {

4794

int new_max = (max == kInfinity) ? max : max - min;

4795

// Recurse once to get the loop or optional matches after the fixed

4796

// ones.

4797

RegExpNode* answer = ToNode(

4798

0, new_max, is_greedy, body, compiler, on_success, true);

4799

// Unroll the forced matches from 0 to min. This can cause chains of

4800

// TextNodes (which the parser does not generate). These should be

4801

// combined if it turns out they hinder good code generation.

4802

for (int i = 0; i < min; i++) {

4803

answer = body->ToNode(compiler, answer);

4804

}

4805

return answer;

4806

}

4807

}

4808

if (max <= kMaxUnrolledMaxMatches && min == 0) {

4809

ASSERT(max > 0); // Due to the 'if' above.

4810

RegExpExpansionLimiter limiter(compiler, max);

4811

if (limiter.ok_to_expand()) {

4812

// Unroll the optional matches up to max.

4813

RegExpNode* answer = on_success;

4814

for (int i = 0; i < max; i++) {

4815

ChoiceNode* alternation = new(zone) ChoiceNode(2, zone);

4816

if (is_greedy) {

4817

alternation->AddAlternative(

4818

GuardedAlternative(body->ToNode(compiler, answer)));

4819

alternation->AddAlternative(GuardedAlternative(on_success));

4820

} else {

4821

alternation->AddAlternative(GuardedAlternative(on_success));

4822

alternation->AddAlternative(

4823

GuardedAlternative(body->ToNode(compiler, answer)));

4824

}

4825

answer = alternation;

4826

if (not_at_start) alternation->set_not_at_start();

4827

}

4828

return answer;

4829

}

4830

}

4831

}

4832

bool has_min = min > 0;

4833

bool has_max = max < RegExpTree::kInfinity;

4834

bool needs_counter = has_min || has_max;

4835

int reg_ctr = needs_counter

4836

? compiler->AllocateRegister()

4837

: RegExpCompiler::kNoRegister;

4838

LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0,

4839

zone);

4840

if (not_at_start) center->set_not_at_start();

4841

RegExpNode* loop_return = needs_counter

4842

? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))

4843

: static_cast<RegExpNode*>(center);

4844

if (body_can_be_empty) {

4845

// If the body can be empty we need to check if it was and then

4846

// backtrack.

4847

loop_return = ActionNode::EmptyMatchCheck(body_start_reg,

4848

reg_ctr,

4849

min,

4850

loop_return);

4851

}

4852

RegExpNode* body_node = body->ToNode(compiler, loop_return);

4853

if (body_can_be_empty) {

4854

// If the body can be empty we need to store the start position

4855

// so we can bail out if it was empty.

4856

body_node = ActionNode::StorePosition(body_start_reg, false, body_node);

4857

}

4858

if (needs_capture_clearing) {

4859

// Before entering the body of this loop we need to clear captures.

4860

body_node = ActionNode::ClearCaptures(capture_registers, body_node);

4861

}

4862

GuardedAlternative body_alt(body_node);

4863

if (has_max) {

4864

Guard* body_guard =

4865

new(zone) Guard(reg_ctr, Guard::LT, max);

4866

body_alt.AddGuard(body_guard, zone);

4867

}

4868

GuardedAlternative rest_alt(on_success);

4869

if (has_min) {

4870

Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);

4871

rest_alt.AddGuard(rest_guard, zone);

4872

}

4873

if (is_greedy) {

4874

center->AddLoopAlternative(body_alt);

4875

center->AddContinueAlternative(rest_alt);

4876

} else {

4877

center->AddContinueAlternative(rest_alt);

4878

center->AddLoopAlternative(body_alt);

4879

}

4880

if (needs_counter) {

4881

return ActionNode::SetRegister(reg_ctr, 0, center);

4882

} else {

4883

return center;

4884

}

4885

}

4886

4887

4888

RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,

4889

RegExpNode* on_success) {

4890

NodeInfo info;

4891

Zone* zone = compiler->zone();

4892

4893

switch (type()) {

4894

case START_OF_LINE:

4895

return AssertionNode::AfterNewline(on_success);

4896

case START_OF_INPUT:

4897

return AssertionNode::AtStart(on_success);

4898

case BOUNDARY:

4899

return AssertionNode::AtBoundary(on_success);

4900

case NON_BOUNDARY:

4901

return AssertionNode::AtNonBoundary(on_success);

4902

case END_OF_INPUT:

4903

return AssertionNode::AtEnd(on_success);

4904

case END_OF_LINE: {

4905

// Compile $ in multiline regexps as an alternation with a positive

4906

// lookahead in one side and an end-of-input on the other side.

4907

// We need two registers for the lookahead.

4908

int stack_pointer_register = compiler->AllocateRegister();

4909

int position_register = compiler->AllocateRegister();

4910

// The ChoiceNode to distinguish between a newline and end-of-input.

4911

ChoiceNode* result = new(zone) ChoiceNode(2, zone);

4912

// Create a newline atom.

4913

ZoneList<CharacterRange>* newline_ranges =

4914

new(zone) ZoneList<CharacterRange>(3, zone);

4915

CharacterRange::AddClassEscape('n', newline_ranges, zone);

4916

RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');

4917

TextNode* newline_matcher = new(zone) TextNode(

4918

newline_atom,

4919

ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

4920

position_register,

4921

0, // No captures inside.

4922

-1, // Ignored if no captures.

4923

on_success));

4924

// Create an end-of-input matcher.

4925

RegExpNode* end_of_line = ActionNode::BeginSubmatch(

4926

stack_pointer_register,

4927

position_register,

4928

newline_matcher);

4929

// Add the two alternatives to the ChoiceNode.

4930

GuardedAlternative eol_alternative(end_of_line);

4931

result->AddAlternative(eol_alternative);

4932

GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));

4933

result->AddAlternative(end_alternative);

4934

return result;

4935

}

4936

default:

4937

UNREACHABLE();

4938

}

4939

return on_success;

4940

}

4941

4942

4943

RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,

4944

RegExpNode* on_success) {

4945

return new(compiler->zone())

4946

BackReferenceNode(RegExpCapture::StartRegister(index()),

4947

RegExpCapture::EndRegister(index()),

4948

on_success);

4949

}

4950

4951

4952

RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,

4953

RegExpNode* on_success) {

4954

return on_success;

4955

}

4956

4957

4958

RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,

4959

RegExpNode* on_success) {

4960

int stack_pointer_register = compiler->AllocateRegister();

4961

int position_register = compiler->AllocateRegister();

4962

4963

const int registers_per_capture = 2;

4964

const int register_of_first_capture = 2;

4965

int register_count = capture_count_ * registers_per_capture;

4966

int register_start =

4967

register_of_first_capture + capture_from_ * registers_per_capture;

4968

4969

RegExpNode* success;

4970

if (is_positive()) {

4971

RegExpNode* node = ActionNode::BeginSubmatch(

4972

stack_pointer_register,

4973

position_register,

4974

body()->ToNode(

4975

compiler,

4976

ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

4977

position_register,

4978

register_count,

4979

register_start,

4980

on_success)));

4981

return node;

4982

} else {

4983

// We use a ChoiceNode for a negative lookahead because it has most of

4984

// the characteristics we need. It has the body of the lookahead as its

4985

// first alternative and the expression after the lookahead of the second

4986

// alternative. If the first alternative succeeds then the

4987

// NegativeSubmatchSuccess will unwind the stack including everything the

4988

// choice node set up and backtrack. If the first alternative fails then

4989

// the second alternative is tried, which is exactly the desired result

4990

// for a negative lookahead. The NegativeLookaheadChoiceNode is a special

4991

// ChoiceNode that knows to ignore the first exit when calculating quick

4992

// checks.

4993

Zone* zone = compiler->zone();

4994

4995

GuardedAlternative body_alt(

4996

body()->ToNode(

4997

compiler,

4998

success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,

4999

position_register,

5000

register_count,

5001

register_start,

5002

zone)));

5003

ChoiceNode* choice_node =

5004

new(zone) NegativeLookaheadChoiceNode(body_alt,

5005

GuardedAlternative(on_success),

5006

zone);

5007

return ActionNode::BeginSubmatch(stack_pointer_register,

5008

position_register,

5009

choice_node);

5010

}

5011

}

5012

5013

5014

RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,

5015

RegExpNode* on_success) {

5016

return ToNode(body(), index(), compiler, on_success);

5017

}

5018

5019

5020

RegExpNode* RegExpCapture::ToNode(RegExpTree* body,

5021

int index,

5022

RegExpCompiler* compiler,

5023

RegExpNode* on_success) {

5024

int start_reg = RegExpCapture::StartRegister(index);

5025

int end_reg = RegExpCapture::EndRegister(index);

5026

RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);

5027

RegExpNode* body_node = body->ToNode(compiler, store_end);

5028

return ActionNode::StorePosition(start_reg, true, body_node);

5029

}

5030

5031

5032

RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,

5033

RegExpNode* on_success) {

5034

ZoneList<RegExpTree*>* children = nodes();

5035

RegExpNode* current = on_success;

5036

for (int i = children->length() - 1; i >= 0; i--) {

5037

current = children->at(i)->ToNode(compiler, current);

5038

}

5039

return current;

5040

}

5041

5042

5043

static void AddClass(const int* elmv,

5044

int elmc,

5045

ZoneList<CharacterRange>* ranges,

5046

Zone* zone) {

5047

elmc--;

5048

ASSERT(elmv[elmc] == 0x10000);

5049

for (int i = 0; i < elmc; i += 2) {

5050

ASSERT(elmv[i] < elmv[i + 1]);

5051

ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);

5052

}

5053

}

5054

5055

5056

static void AddClassNegated(const int *elmv,

5057

int elmc,

5058

ZoneList<CharacterRange>* ranges,

5059

Zone* zone) {

5060

elmc--;

5061

ASSERT(elmv[elmc] == 0x10000);

5062

ASSERT(elmv[0] != 0x0000);

5063

ASSERT(elmv[elmc-1] != String::kMaxUtf16CodeUnit);

5064

uc16 last = 0x0000;

5065

for (int i = 0; i < elmc; i += 2) {

5066

ASSERT(last <= elmv[i] - 1);

5067

ASSERT(elmv[i] < elmv[i + 1]);

5068

ranges->Add(CharacterRange(last, elmv[i] - 1), zone);

5069

last = elmv[i + 1];

5070

}

5071

ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone);

5072

}

5073

5074

5075

void CharacterRange::AddClassEscape(uc16 type,

5076

ZoneList<CharacterRange>* ranges,

5077

Zone* zone) {

5078

switch (type) {

5079

case 's':

5080

AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);

5081

break;

5082

case 'S':

5083

AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);

5084

break;

5085

case 'w':

5086

AddClass(kWordRanges, kWordRangeCount, ranges, zone);

5087

break;

5088

case 'W':

5089

AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);

5090

break;

5091

case 'd':

5092

AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);

5093

break;

5094

case 'D':

5095

AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);

5096

break;

5097

case '.':

5098

AddClassNegated(kLineTerminatorRanges,

5099

kLineTerminatorRangeCount,

5100

ranges,

5101

zone);

5102

break;

5103

// This is not a character range as defined by the spec but a

5104

// convenient shorthand for a character class that matches any

5105

// character.

5106

case '*':

5107

ranges->Add(CharacterRange::Everything(), zone);

5108

break;

5109

// This is the set of characters matched by the $ and ^ symbols

5110

// in multiline mode.

5111

case 'n':

5112

AddClass(kLineTerminatorRanges,

5113

kLineTerminatorRangeCount,

5114

ranges,

5115

zone);

5116

break;

5117

default:

5118

UNREACHABLE();

5119

}

5120

}

5121

5122

5123

Vector<const int> CharacterRange::GetWordBounds() {

5124

return Vector<const int>(kWordRanges, kWordRangeCount - 1);

5125

}

5126

5127

5128

class CharacterRangeSplitter {

5129

public:

5130

CharacterRangeSplitter(ZoneList<CharacterRange>** included,

5131

ZoneList<CharacterRange>** excluded,

5132

Zone* zone)

5133

: included_(included),

5134

excluded_(excluded),

5135

zone_(zone) { }

5136

void Call(uc16 from, DispatchTable::Entry entry);

5137

5138

static const int kInBase = 0;

5139

static const int kInOverlay = 1;

5140

5141

private:

5142

ZoneList<CharacterRange>** included_;

5143

ZoneList<CharacterRange>** excluded_;

5144

Zone* zone_;

5145

};

5146

5147

5148

void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {

5149

if (!entry.out_set()->Get(kInBase)) return;

5150

ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)

5151

? included_

5152

: excluded_;

5153

if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_);

5154

(*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);

5155

}

5156

5157

5158

void CharacterRange::Split(ZoneList<CharacterRange>* base,

5159

Vector<const int> overlay,

5160

ZoneList<CharacterRange>** included,

5161

ZoneList<CharacterRange>** excluded,

5162

Zone* zone) {

5163

ASSERT_EQ(NULL, *included);

5164

ASSERT_EQ(NULL, *excluded);

5165

DispatchTable table(zone);

5166

for (int i = 0; i < base->length(); i++)

5167

table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);

5168

for (int i = 0; i < overlay.length(); i += 2) {

5169

table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),

5170

CharacterRangeSplitter::kInOverlay, zone);

5171

}

5172

CharacterRangeSplitter callback(included, excluded, zone);

5173

table.ForEach(&callback);

5174

}

5175

5176

5177

void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

5178

bool is_ascii,

5179

Zone* zone) {

5180

Isolate* isolate = Isolate::Current();

5181

uc16 bottom = from();

5182

uc16 top = to();

5183

if (is_ascii) {

5184

if (bottom > String::kMaxAsciiCharCode) return;

5185

if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;

5186

}

5187

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5188

if (top == bottom) {

5189

// If this is a singleton we just expand the one character.

5190

int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);

5191

for (int i = 0; i < length; i++) {

5192

uc32 chr = chars[i];

5193

if (chr != bottom) {

5194

ranges->Add(CharacterRange::Singleton(chars[i]), zone);

5195

}

5196

}

5197

} else {

5198

// If this is a range we expand the characters block by block,

5199

// expanding contiguous subranges (blocks) one at a time.

5200

// The approach is as follows. For a given start character we

5201

// look up the remainder of the block that contains it (represented

5202

// by the end point), for instance we find 'z' if the character

5203

// is 'c'. A block is characterized by the property

5204

// that all characters uncanonicalize in the same way, except that

5205

// each entry in the result is incremented by the distance from the first

5206

// element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and

5207

// the k'th letter uncanonicalizes to ['a' + k, 'A' + k].

5208

// Once we've found the end point we look up its uncanonicalization

5209

// and produce a range for each element. For instance for [c-f]

5210

// we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only

5211

// add a range if it is not already contained in the input, so [c-f]

5212

// will be skipped but [C-F] will be added. If this range is not

5213

// completely contained in a block we do this for all the blocks

5214

// covered by the range (handling characters that is not in a block

5215

// as a "singleton block").

5216

unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5217

int pos = bottom;

5218

while (pos <= top) {

5219

int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);

5220

uc16 block_end;

5221

if (length == 0) {

5222

block_end = pos;

5223

} else {

5224

ASSERT_EQ(1, length);

5225

block_end = range[0];

5226

}

5227

int end = (block_end > top) ? top : block_end;

5228

length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);

5229

for (int i = 0; i < length; i++) {

5230

uc32 c = range[i];

5231

uc16 range_from = c - (block_end - pos);

5232

uc16 range_to = c - (block_end - end);

5233

if (!(bottom <= range_from && range_to <= top)) {

5234

ranges->Add(CharacterRange(range_from, range_to), zone);

5235

}

5236

}

5237

pos = end + 1;

5238

}

5239

}

5240

}

5241

5242

5243

bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {

5244

ASSERT_NOT_NULL(ranges);

5245

int n = ranges->length();

5246

if (n <= 1) return true;

5247

int max = ranges->at(0).to();

5248

for (int i = 1; i < n; i++) {

5249

CharacterRange next_range = ranges->at(i);

5250

if (next_range.from() <= max + 1) return false;

5251

max = next_range.to();

5252

}

5253

return true;

5254

}

5255

5256

5257

ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {

5258

if (ranges_ == NULL) {

5259

ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);

5260

CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone);

5261

}

5262

return ranges_;

5263

}

5264

5265

5266

// Move a number of elements in a zonelist to another position

5267

// in the same list. Handles overlapping source and target areas.

5268

static void MoveRanges(ZoneList<CharacterRange>* list,

5269

int from,

5270

int to,

5271

int count) {

5272

// Ranges are potentially overlapping.

5273

if (from < to) {

5274

for (int i = count - 1; i >= 0; i--) {

5275

list->at(to + i) = list->at(from + i);

5276

}

5277

} else {

5278

for (int i = 0; i < count; i++) {

5279

list->at(to + i) = list->at(from + i);

5280

}

5281

}

5282

}

5283

5284

5285

static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,

5286

int count,

5287

CharacterRange insert) {

5288

// Inserts a range into list[0..count[, which must be sorted

5289

// by from value and non-overlapping and non-adjacent, using at most

5290

// list[0..count] for the result. Returns the number of resulting

5291

// canonicalized ranges. Inserting a range may collapse existing ranges into

5292

// fewer ranges, so the return value can be anything in the range 1..count+1.

5293

uc16 from = insert.from();

5294

uc16 to = insert.to();

5295

int start_pos = 0;

5296

int end_pos = count;

5297

for (int i = count - 1; i >= 0; i--) {

5298

CharacterRange current = list->at(i);

5299

if (current.from() > to + 1) {

5300

end_pos = i;

5301

} else if (current.to() + 1 < from) {

5302

start_pos = i + 1;

5303

break;

5304

}

5305

}

5306

5307

// Inserted range overlaps, or is adjacent to, ranges at positions

5308

// [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are

5309

// not affected by the insertion.

5310

// If start_pos == end_pos, the range must be inserted before start_pos.

5311

// if start_pos < end_pos, the entire range from start_pos to end_pos

5312

// must be merged with the insert range.

5313

5314

if (start_pos == end_pos) {

5315

// Insert between existing ranges at position start_pos.

5316

if (start_pos < count) {

5317

MoveRanges(list, start_pos, start_pos + 1, count - start_pos);

5318

}

5319

list->at(start_pos) = insert;

5320

return count + 1;

5321

}

5322

if (start_pos + 1 == end_pos) {

5323

// Replace single existing range at position start_pos.

5324

CharacterRange to_replace = list->at(start_pos);

5325

int new_from = Min(to_replace.from(), from);

5326

int new_to = Max(to_replace.to(), to);

5327

list->at(start_pos) = CharacterRange(new_from, new_to);

5328

return count;

5329

}

5330

// Replace a number of existing ranges from start_pos to end_pos - 1.

5331

// Move the remaining ranges down.

5332

5333

int new_from = Min(list->at(start_pos).from(), from);

5334

int new_to = Max(list->at(end_pos - 1).to(), to);

5335

if (end_pos < count) {

5336

MoveRanges(list, end_pos, start_pos + 1, count - end_pos);

5337

}

5338

list->at(start_pos) = CharacterRange(new_from, new_to);

5339

return count - (end_pos - start_pos) + 1;

5340

}

5341

5342

5343

void CharacterSet::Canonicalize() {

5344

// Special/default classes are always considered canonical. The result

5345

// of calling ranges() will be sorted.

5346

if (ranges_ == NULL) return;

5347

CharacterRange::Canonicalize(ranges_);

5348

}

5349

5350

5351

void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {

5352

if (character_ranges->length() <= 1) return;

5353

// Check whether ranges are already canonical (increasing, non-overlapping,

5354

// non-adjacent).

5355

int n = character_ranges->length();

5356

int max = character_ranges->at(0).to();

5357

int i = 1;

5358

while (i < n) {

5359

CharacterRange current = character_ranges->at(i);

5360

if (current.from() <= max + 1) {

5361

break;

5362

}

5363

max = current.to();

5364

i++;

5365

}

5366

// Canonical until the i'th range. If that's all of them, we are done.

5367

if (i == n) return;

5368

5369

// The ranges at index i and forward are not canonicalized. Make them so by

5370

// doing the equivalent of insertion sort (inserting each into the previous

5371

// list, in order).

5372

// Notice that inserting a range can reduce the number of ranges in the

5373

// result due to combining of adjacent and overlapping ranges.

5374

int read = i; // Range to insert.

5375

int num_canonical = i; // Length of canonicalized part of list.

5376

do {

5377

num_canonical = InsertRangeInCanonicalList(character_ranges,

5378

num_canonical,

5379

character_ranges->at(read));

5380

read++;

5381

} while (read < n);

5382

character_ranges->Rewind(num_canonical);

5383

5384

ASSERT(CharacterRange::IsCanonical(character_ranges));

5385

}

5386

5387

5388

void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,

5389

ZoneList<CharacterRange>* negated_ranges,

5390

Zone* zone) {

5391

ASSERT(CharacterRange::IsCanonical(ranges));

5392

ASSERT_EQ(0, negated_ranges->length());

5393

int range_count = ranges->length();

5394

uc16 from = 0;

5395

int i = 0;

5396

if (range_count > 0 && ranges->at(0).from() == 0) {

5397

from = ranges->at(0).to();

5398

i = 1;

5399

}

5400

while (i < range_count) {

5401

CharacterRange range = ranges->at(i);

5402

negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);

5403

from = range.to();

5404

i++;

5405

}

5406

if (from < String::kMaxUtf16CodeUnit) {

5407

negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),

5408

zone);

5409

}

5410

}

5411

5412

5413

// -------------------------------------------------------------------

5414

// Splay tree

5415

5416

5417

OutSet* OutSet::Extend(unsigned value, Zone* zone) {

5418

if (Get(value))

5419

return this;

5420

if (successors(zone) != NULL) {

5421

for (int i = 0; i < successors(zone)->length(); i++) {

5422

OutSet* successor = successors(zone)->at(i);

5423

if (successor->Get(value))

5424

return successor;

5425

}

5426

} else {

5427

successors_ = new(zone) ZoneList<OutSet*>(2, zone);

5428

}

5429

OutSet* result = new(zone) OutSet(first_, remaining_);

5430

result->Set(value, zone);

5431

successors(zone)->Add(result, zone);

5432

return result;

5433

}

5434

5435

5436

void OutSet::Set(unsigned value, Zone *zone) {

5437

if (value < kFirstLimit) {

5438

first_ |= (1 << value);

5439

} else {

5440

if (remaining_ == NULL)

5441

remaining_ = new(zone) ZoneList<unsigned>(1, zone);

5442

if (remaining_->is_empty() || !remaining_->Contains(value))

5443

remaining_->Add(value, zone);

5444

}

5445

}

5446

5447

5448

bool OutSet::Get(unsigned value) {

5449

if (value < kFirstLimit) {

5450

return (first_ & (1 << value)) != 0;

5451

} else if (remaining_ == NULL) {

5452

return false;

5453

} else {

5454

return remaining_->Contains(value);

5455

}

5456

}

5457

5458

5459

const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;

5460

5461

5462

void DispatchTable::AddRange(CharacterRange full_range, int value,

5463

Zone* zone) {

5464

CharacterRange current = full_range;

5465

if (tree()->is_empty()) {

5466

// If this is the first range we just insert into the table.

5467

ZoneSplayTree<Config>::Locator loc;

5468

ASSERT_RESULT(tree()->Insert(current.from(), &loc));

5469

loc.set_value(Entry(current.from(), current.to(),

5470

empty()->Extend(value, zone)));

5471

return;

5472

}

5473

// First see if there is a range to the left of this one that

5474

// overlaps.

5475

ZoneSplayTree<Config>::Locator loc;

5476

if (tree()->FindGreatestLessThan(current.from(), &loc)) {

5477

Entry* entry = &loc.value();

5478

// If we've found a range that overlaps with this one, and it

5479

// starts strictly to the left of this one, we have to fix it

5480

// because the following code only handles ranges that start on

5481

// or after the start point of the range we're adding.

5482

if (entry->from() < current.from() && entry->to() >= current.from()) {

5483

// Snap the overlapping range in half around the start point of

5484

// the range we're adding.

5485

CharacterRange left(entry->from(), current.from() - 1);

5486

CharacterRange right(current.from(), entry->to());

5487

// The left part of the overlapping range doesn't overlap.

5488

// Truncate the whole entry to be just the left part.

5489

entry->set_to(left.to());

5490

// The right part is the one that overlaps. We add this part

5491

// to the map and let the next step deal with merging it with

5492

// the range we're adding.

5493

ZoneSplayTree<Config>::Locator loc;

5494

ASSERT_RESULT(tree()->Insert(right.from(), &loc));

5495

loc.set_value(Entry(right.from(),

5496

right.to(),

5497

entry->out_set()));

5498

}

5499

}

5500

while (current.is_valid()) {

5501

if (tree()->FindLeastGreaterThan(current.from(), &loc) &&

5502

(loc.value().from() <= current.to()) &&

5503

(loc.value().to() >= current.from())) {

5504

Entry* entry = &loc.value();

5505

// We have overlap. If there is space between the start point of

5506

// the range we're adding and where the overlapping range starts

5507

// then we have to add a range covering just that space.

5508

if (current.from() < entry->from()) {

5509

ZoneSplayTree<Config>::Locator ins;

5510

ASSERT_RESULT(tree()->Insert(current.from(), &ins));

5511

ins.set_value(Entry(current.from(),

5512

entry->from() - 1,

5513

empty()->Extend(value, zone)));

5514

current.set_from(entry->from());

5515

}

5516

ASSERT_EQ(current.from(), entry->from());

5517

// If the overlapping range extends beyond the one we want to add

5518

// we have to snap the right part off and add it separately.

5519

if (entry->to() > current.to()) {

5520

ZoneSplayTree<Config>::Locator ins;

5521

ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));

5522

ins.set_value(Entry(current.to() + 1,

5523

entry->to(),

5524

entry->out_set()));

5525

entry->set_to(current.to());

5526

}

5527

ASSERT(entry->to() <= current.to());

5528

// The overlapping range is now completely contained by the range

5529

// we're adding so we can just update it and move the start point

5530

// of the range we're adding just past it.

5531

entry->AddValue(value, zone);

5532

// Bail out if the last interval ended at 0xFFFF since otherwise

5533

// adding 1 will wrap around to 0.

5534

if (entry->to() == String::kMaxUtf16CodeUnit)

5535

break;

5536

ASSERT(entry->to() + 1 > current.from());

5537

current.set_from(entry->to() + 1);

5538

} else {

5539

// There is no overlap so we can just add the range

5540

ZoneSplayTree<Config>::Locator ins;

5541

ASSERT_RESULT(tree()->Insert(current.from(), &ins));

5542

ins.set_value(Entry(current.from(),

5543

current.to(),

5544

empty()->Extend(value, zone)));

5545

break;

5546

}

5547

}

5548

}

5549

5550

5551

OutSet* DispatchTable::Get(uc16 value) {

5552

ZoneSplayTree<Config>::Locator loc;

5553

if (!tree()->FindGreatestLessThan(value, &loc))

5554

return empty();

5555

Entry* entry = &loc.value();

5556

if (value <= entry->to())

5557

return entry->out_set();

5558

else

5559

return empty();

5560

}

5561

5562

5563

// -------------------------------------------------------------------

5564

// Analysis

5565

5566

5567

void Analysis::EnsureAnalyzed(RegExpNode* that) {

5568

StackLimitCheck check(Isolate::Current());

5569

if (check.HasOverflowed()) {

5570

fail("Stack overflow");

5571

return;

5572

}

5573

if (that->info()->been_analyzed || that->info()->being_analyzed)

5574

return;

5575

that->info()->being_analyzed = true;

5576

that->Accept(this);

5577

that->info()->being_analyzed = false;

5578

that->info()->been_analyzed = true;

5579

}

5580

5581

5582

void Analysis::VisitEnd(EndNode* that) {

5583

// nothing to do

5584

}

5585

5586

5587

void TextNode::CalculateOffsets() {

5588

int element_count = elements()->length();

5589

// Set up the offsets of the elements relative to the start. This is a fixed

5590

// quantity since a TextNode can only contain fixed-width things.

5591

int cp_offset = 0;

5592

for (int i = 0; i < element_count; i++) {

5593

TextElement& elm = elements()->at(i);

5594

elm.cp_offset = cp_offset;

5595

if (elm.type == TextElement::ATOM) {

5596

cp_offset += elm.data.u_atom->data().length();

5597

} else {

5598

cp_offset++;

5599

}

5600

}

5601

}

5602

5603

5604

void Analysis::VisitText(TextNode* that) {

5605

if (ignore_case_) {

5606

that->MakeCaseIndependent(is_ascii_);

5607

}

5608

EnsureAnalyzed(that->on_success());

5609

if (!has_failed()) {

5610

that->CalculateOffsets();

5611

}

5612

}

5613

5614

5615

void Analysis::VisitAction(ActionNode* that) {

5616

RegExpNode* target = that->on_success();

5617

EnsureAnalyzed(target);

5618

if (!has_failed()) {

5619

// If the next node is interested in what it follows then this node

5620

// has to be interested too so it can pass the information on.

5621

that->info()->AddFromFollowing(target->info());

5622

}

5623

}

5624

5625

5626

void Analysis::VisitChoice(ChoiceNode* that) {

5627

NodeInfo* info = that->info();

5628

for (int i = 0; i < that->alternatives()->length(); i++) {

5629

RegExpNode* node = that->alternatives()->at(i).node();

5630

EnsureAnalyzed(node);

5631

if (has_failed()) return;

5632

// Anything the following nodes need to know has to be known by

5633

// this node also, so it can pass it on.

5634

info->AddFromFollowing(node->info());

5635

}

5636

}

5637

5638

5639

void Analysis::VisitLoopChoice(LoopChoiceNode* that) {

5640

NodeInfo* info = that->info();

5641

for (int i = 0; i < that->alternatives()->length(); i++) {

5642

RegExpNode* node = that->alternatives()->at(i).node();

5643

if (node != that->loop_node()) {

5644

EnsureAnalyzed(node);

5645

if (has_failed()) return;

5646

info->AddFromFollowing(node->info());

5647

}

5648

}

5649

// Check the loop last since it may need the value of this node

5650

// to get a correct result.

5651

EnsureAnalyzed(that->loop_node());

5652

if (!has_failed()) {

5653

info->AddFromFollowing(that->loop_node()->info());

5654

}

5655

}

5656

5657

5658

void Analysis::VisitBackReference(BackReferenceNode* that) {

5659

EnsureAnalyzed(that->on_success());

5660

}

5661

5662

5663

void Analysis::VisitAssertion(AssertionNode* that) {

5664

EnsureAnalyzed(that->on_success());

5665

}

5666

5667

5668

void BackReferenceNode::FillInBMInfo(int offset,

5669

int recursion_depth,

5670

int budget,

5671

BoyerMooreLookahead* bm,

5672

bool not_at_start) {

5673

// Working out the set of characters that a backreference can match is too

5674

// hard, so we just say that any character can match.

5675

bm->SetRest(offset);

5676

SaveBMInfo(bm, not_at_start, offset);

5677

}

5678

5679

5680

STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==

5681

RegExpMacroAssembler::kTableSize);

5682

5683

5684

void ChoiceNode::FillInBMInfo(int offset,

5685

int recursion_depth,

5686

int budget,

5687

BoyerMooreLookahead* bm,

5688

bool not_at_start) {

5689

ZoneList<GuardedAlternative>* alts = alternatives();

5690

budget = (budget - 1) / alts->length();

5691

for (int i = 0; i < alts->length(); i++) {

5692

GuardedAlternative& alt = alts->at(i);

5693

if (alt.guards() != NULL && alt.guards()->length() != 0) {

5694

bm->SetRest(offset); // Give up trying to fill in info.

5695

SaveBMInfo(bm, not_at_start, offset);

5696

return;

5697

}

5698

alt.node()->FillInBMInfo(

5699

offset, recursion_depth + 1, budget, bm, not_at_start);

5700

}

5701

SaveBMInfo(bm, not_at_start, offset);

5702

}

5703

5704

5705

void TextNode::FillInBMInfo(int initial_offset,

5706

int recursion_depth,

5707

int budget,

5708

BoyerMooreLookahead* bm,

5709

bool not_at_start) {

5710

if (initial_offset >= bm->length()) return;

5711

int offset = initial_offset;

5712

int max_char = bm->max_char();

5713

for (int i = 0; i < elements()->length(); i++) {

5714

if (offset >= bm->length()) {

5715

if (initial_offset == 0) set_bm_info(not_at_start, bm);

5716

return;

5717

}

5718

TextElement text = elements()->at(i);

5719

if (text.type == TextElement::ATOM) {

5720

RegExpAtom* atom = text.data.u_atom;

5721

for (int j = 0; j < atom->length(); j++, offset++) {

5722

if (offset >= bm->length()) {

5723

if (initial_offset == 0) set_bm_info(not_at_start, bm);

5724

return;

5725

}

5726

uc16 character = atom->data()[j];

5727

if (bm->compiler()->ignore_case()) {

5728

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5729

int length = GetCaseIndependentLetters(

5730

ISOLATE,

5731

character,

5732

bm->max_char() == String::kMaxAsciiCharCode,

5733

chars);

5734

for (int j = 0; j < length; j++) {

5735

bm->Set(offset, chars[j]);

5736

}

5737

} else {

5738

if (character <= max_char) bm->Set(offset, character);

5739

}

5740

}

5741

} else {

5742

ASSERT(text.type == TextElement::CHAR_CLASS);

5743

RegExpCharacterClass* char_class = text.data.u_char_class;

5744

ZoneList<CharacterRange>* ranges = char_class->ranges(zone());

5745

if (char_class->is_negated()) {

5746

bm->SetAll(offset);

5747

} else {

5748

for (int k = 0; k < ranges->length(); k++) {

5749

CharacterRange& range = ranges->at(k);

5750

if (range.from() > max_char) continue;

5751

int to = Min(max_char, static_cast<int>(range.to()));

5752

bm->SetInterval(offset, Interval(range.from(), to));

5753

}

5754

}

5755

offset++;

5756

}

5757

}

5758

if (offset >= bm->length()) {

5759

if (initial_offset == 0) set_bm_info(not_at_start, bm);

5760

return;

5761

}

5762

on_success()->FillInBMInfo(offset,

5763

recursion_depth + 1,

5764

budget - 1,

5765

bm,

5766

true); // Not at start after a text node.

5767

if (initial_offset == 0) set_bm_info(not_at_start, bm);

5768

}

5769

5770

5771

// -------------------------------------------------------------------

5772

// Dispatch table construction

5773

5774

5775

void DispatchTableConstructor::VisitEnd(EndNode* that) {

5776

AddRange(CharacterRange::Everything());

5777

}

5778

5779

5780

void DispatchTableConstructor::BuildTable(ChoiceNode* node) {

5781

node->set_being_calculated(true);

5782

ZoneList<GuardedAlternative>* alternatives = node->alternatives();

5783

for (int i = 0; i < alternatives->length(); i++) {

5784

set_choice_index(i);

5785

alternatives->at(i).node()->Accept(this);

5786

}

5787

node->set_being_calculated(false);

5788

}

5789

5790

5791

class AddDispatchRange {

5792

public:

5793

explicit AddDispatchRange(DispatchTableConstructor* constructor)

5794

: constructor_(constructor) { }

5795

void Call(uc32 from, DispatchTable::Entry entry);

5796

private:

5797

DispatchTableConstructor* constructor_;

5798

};

5799

5800

5801

void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {

5802

CharacterRange range(from, entry.to());

5803

constructor_->AddRange(range);

5804

}

5805

5806

5807

void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {

5808

if (node->being_calculated())

5809

return;

5810

DispatchTable* table = node->GetTable(ignore_case_);

5811

AddDispatchRange adder(this);

5812

table->ForEach(&adder);

5813

}

5814

5815

5816

void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {

5817

// TODO(160): Find the node that we refer back to and propagate its start

5818

// set back to here. For now we just accept anything.

5819

AddRange(CharacterRange::Everything());

5820

}

5821

5822

5823

void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {

5824

RegExpNode* target = that->on_success();

5825

target->Accept(this);

5826

}

5827

5828

5829

static int CompareRangeByFrom(const CharacterRange* a,

5830

const CharacterRange* b) {

5831

return Compare<uc16>(a->from(), b->from());

5832

}

5833

5834

5835

void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {

5836

ranges->Sort(CompareRangeByFrom);

5837

uc16 last = 0;

5838

for (int i = 0; i < ranges->length(); i++) {

5839

CharacterRange range = ranges->at(i);

5840

if (last < range.from())

5841

AddRange(CharacterRange(last, range.from() - 1));

5842

if (range.to() >= last) {

5843

if (range.to() == String::kMaxUtf16CodeUnit) {

5844

return;

5845

} else {

5846

last = range.to() + 1;

5847

}

5848

}

5849

}

5850

AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit));

5851

}

5852

5853

5854

void DispatchTableConstructor::VisitText(TextNode* that) {

5855

TextElement elm = that->elements()->at(0);

5856

switch (elm.type) {

5857

case TextElement::ATOM: {

5858

uc16 c = elm.data.u_atom->data()[0];

5859

AddRange(CharacterRange(c, c));

5860

break;

5861

}

5862

case TextElement::CHAR_CLASS: {

5863

RegExpCharacterClass* tree = elm.data.u_char_class;

5864

ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());

5865

if (tree->is_negated()) {

5866

AddInverse(ranges);

5867

} else {

5868

for (int i = 0; i < ranges->length(); i++)

5869

AddRange(ranges->at(i));

5870

}

5871

break;

5872

}

5873

default: {

5874

UNIMPLEMENTED();

5875

}

5876

}

5877

}

5878

5879

5880

void DispatchTableConstructor::VisitAction(ActionNode* that) {

5881

RegExpNode* target = that->on_success();

5882

target->Accept(this);

5883

}

5884

5885

5886

RegExpEngine::CompilationResult RegExpEngine::Compile(

5887

RegExpCompileData* data,

5888

bool ignore_case,

5889

bool is_global,

5890

bool is_multiline,

5891

Handle<String> pattern,

5892

Handle<String> sample_subject,

5893

bool is_ascii,

5894

Zone* zone) {

5895

if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {

5896

return IrregexpRegExpTooBig();

5897

}

5898

RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);

5899

5900

// Sample some characters from the middle of the string.

5901

static const int kSampleSize = 128;

5902

5903

FlattenString(sample_subject);

5904

int chars_sampled = 0;

5905

int half_way = (sample_subject->length() - kSampleSize) / 2;

5906

for (int i = Max(0, half_way);

5907

i < sample_subject->length() && chars_sampled < kSampleSize;

5908

i++, chars_sampled++) {

5909

compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));

5910

}

5911

5912

// Wrap the body of the regexp in capture #0.

5913

RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,

5914

5915

&compiler,

5916

compiler.accept());

5917

RegExpNode* node = captured_body;

5918

bool is_end_anchored = data->tree->IsAnchoredAtEnd();

5919

bool is_start_anchored = data->tree->IsAnchoredAtStart();

5920

int max_length = data->tree->max_match();

5921

if (!is_start_anchored) {

5922

// Add a .*? at the beginning, outside the body capture, unless

5923

// this expression is anchored at the beginning.

5924

RegExpNode* loop_node =

5925

RegExpQuantifier::ToNode(0,

5926

RegExpTree::kInfinity,

5927

false,

5928

new(zone) RegExpCharacterClass('*'),

5929

&compiler,

5930

captured_body,

5931

data->contains_anchor);

5932

5933

if (data->contains_anchor) {

5934

// Unroll loop once, to take care of the case that might start

5935

// at the start of input.

5936

ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);

5937

first_step_node->AddAlternative(GuardedAlternative(captured_body));

5938

first_step_node->AddAlternative(GuardedAlternative(

5939

new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));

5940

node = first_step_node;

5941

} else {

5942

node = loop_node;

5943

}

5944

}

5945

if (is_ascii) {

5946

node = node->FilterASCII(RegExpCompiler::kMaxRecursion);

5947

// Do it again to propagate the new nodes to places where they were not

5948

// put because they had not been calculated yet.

5949

if (node != NULL) node = node->FilterASCII(RegExpCompiler::kMaxRecursion);

5950

}

5951

5952

if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);

5953

data->node = node;

5954

Analysis analysis(ignore_case, is_ascii);

5955

analysis.EnsureAnalyzed(node);

5956

if (analysis.has_failed()) {

5957

const char* error_message = analysis.error_message();

5958

return CompilationResult(error_message);

5959

}

5960

5961

// Create the correct assembler for the architecture.

5962

#ifndef V8_INTERPRETED_REGEXP

5963

// Native regexp implementation.

5964

5965

NativeRegExpMacroAssembler::Mode mode =

5966

is_ascii ? NativeRegExpMacroAssembler::ASCII

5967

: NativeRegExpMacroAssembler::UC16;

5968

5969

#if V8_TARGET_ARCH_IA32

5970

RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,

5971

zone);

5972

#elif V8_TARGET_ARCH_X64

5973

RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,

5974

zone);

5975

#elif V8_TARGET_ARCH_ARM

5976

RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,

5977

zone);

5978

#elif V8_TARGET_ARCH_MIPS

5979

RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2,

5980

zone);

5981

#endif

5982

5983

#else // V8_INTERPRETED_REGEXP

5984

// Interpreted regexp implementation.

5985

EmbeddedVector<byte, 1024> codes;

5986

RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);

5987

#endif // V8_INTERPRETED_REGEXP

5988

5989

// Inserted here, instead of in Assembler, because it depends on information

5990

// in the AST that isn't replicated in the Node structure.

5991

static const int kMaxBacksearchLimit = 1024;

5992

if (is_end_anchored &&

5993

!is_start_anchored &&

5994

max_length < kMaxBacksearchLimit) {

5995

macro_assembler.SetCurrentPositionFromEnd(max_length);

5996

}

5997

5998

if (is_global) {

5999

macro_assembler.set_global_mode(

6000

(data->tree->min_match() > 0)

6001

? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK

6002

: RegExpMacroAssembler::GLOBAL);

6003

}

6004

6005

return compiler.Assemble(&macro_assembler,

6006

node,

6007

data->capture_count,

6008

pattern);

6009

}

6010

6011

6012

}} // namespace v8::internal

Older »