1
// Copyright 2011 the V8 project authors. All rights reserved.
2
// Redistribution and use in source and binary forms, with or without
3
// modification, are permitted provided that the following conditions are
6
// * Redistributions of source code must retain the above copyright
7
// notice, this list of conditions and the following disclaimer.
8
// * Redistributions in binary form must reproduce the above
9
// copyright notice, this list of conditions and the following
10
// disclaimer in the documentation and/or other materials provided
11
// with the distribution.
12
// * Neither the name of Google Inc. nor the names of its
13
// contributors may be used to endorse or promote products derived
14
// from this software without specific prior written permission.
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
// TODO(cira): Remove LanguageMatcher from v8 when ICU implements
29
// language matching API.
31
#include "src/extensions/experimental/language-matcher.h"
35
#include "src/extensions/experimental/i18n-utils.h"
36
#include "unicode/datefmt.h" // For getAvailableLocales
37
#include "unicode/locid.h"
38
#include "unicode/uloc.h"
43
const unsigned int LanguageMatcher::kLanguageWeight = 75;
44
const unsigned int LanguageMatcher::kScriptWeight = 20;
45
const unsigned int LanguageMatcher::kRegionWeight = 5;
46
const unsigned int LanguageMatcher::kThreshold = 50;
47
const unsigned int LanguageMatcher::kPositionBonus = 1;
48
const char* const LanguageMatcher::kDefaultLocale = "root";
50
static const char* GetLanguageException(const char*);
51
static bool BCP47ToICUFormat(const char*, char*);
52
static int CompareLocaleSubtags(const char*, const char*);
53
static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*);
55
LocaleIDMatch::LocaleIDMatch()
58
bcp47_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
61
icu_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
64
LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) {
65
I18NUtils::StrNCopy(this->bcp47_id, ULOC_FULLNAME_CAPACITY, rhs.bcp47_id);
66
I18NUtils::StrNCopy(this->icu_id, ULOC_FULLNAME_CAPACITY, rhs.icu_id);
67
this->score = rhs.score;
73
void LanguageMatcher::GetBestMatchForPriorityList(
74
v8::Handle<v8::Array> locales, LocaleIDMatch* result) {
75
v8::HandleScope handle_scope;
77
unsigned int position_bonus = locales->Length() * kPositionBonus;
81
for (unsigned int i = 0; i < locales->Length(); ++i) {
82
position_bonus -= kPositionBonus;
84
v8::TryCatch try_catch;
85
v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i));
87
// Return default if exception is raised when reading parameter.
88
if (try_catch.HasCaught()) break;
90
// JavaScript arrays can be heterogenous so check each item
92
if (!locale_id->IsString()) continue;
94
if (!CompareToSupportedLocaleIDList(locale_id->ToString(), &match)) {
98
// Skip items under threshold.
99
if (match.score < kThreshold) continue;
101
match.score += position_bonus;
102
if (match.score > max_score) {
105
max_score = match.score;
111
void LanguageMatcher::GetBestMatchForString(
112
v8::Handle<v8::String> locale, LocaleIDMatch* result) {
115
if (CompareToSupportedLocaleIDList(locale, &match) &&
116
match.score >= kThreshold) {
122
bool LanguageMatcher::CompareToSupportedLocaleIDList(
123
v8::Handle<v8::String> locale_id, LocaleIDMatch* result) {
124
static int32_t available_count = 0;
125
// Depending on how ICU data is built, locales returned by
126
// Locale::getAvailableLocale() are not guaranteed to support DateFormat,
127
// Collation and other services. We can call getAvailableLocale() of all the
128
// services we want to support and take the intersection of them all, but
129
// using DateFormat::getAvailableLocales() should suffice.
130
// TODO(cira): Maybe make this thread-safe?
131
static const icu::Locale* available_locales =
132
icu::DateFormat::getAvailableLocales(available_count);
134
// Skip this locale_id if it's not in ASCII.
135
static LocaleIDMatch default_match;
136
v8::String::AsciiValue ascii_value(locale_id);
137
if (*ascii_value == NULL) return false;
139
char locale[ULOC_FULLNAME_CAPACITY];
140
if (!BCP47ToICUFormat(*ascii_value, locale)) return false;
142
icu::Locale input_locale(locale);
144
// Position of the best match locale in list of available locales.
146
const char* language = GetLanguageException(input_locale.getLanguage());
147
const char* script = input_locale.getScript();
148
const char* region = input_locale.getCountry();
149
for (int32_t i = 0; i < available_count; ++i) {
150
int current_score = 0;
152
CompareLocaleSubtags(language, available_locales[i].getLanguage());
153
current_score += sign * kLanguageWeight;
155
sign = CompareLocaleSubtags(script, available_locales[i].getScript());
156
current_score += sign * kScriptWeight;
158
sign = CompareLocaleSubtags(region, available_locales[i].getCountry());
159
current_score += sign * kRegionWeight;
161
if (current_score >= kThreshold && current_score > result->score) {
162
result->score = current_score;
167
// Didn't find any good matches so use defaults.
168
if (position == -1) return false;
170
return BuildLocaleName(available_locales[position].getBaseName(),
171
input_locale.getName(), result);
174
// For some unsupported language subtags it is better to fallback to related
175
// language that is supported than to default.
176
static const char* GetLanguageException(const char* language) {
177
// Serbo-croatian to Serbian.
178
if (!strcmp(language, "sh")) return "sr";
180
// Norweigan to Norweiaan to Norwegian Bokmal.
181
if (!strcmp(language, "no")) return "nb";
183
// Moldavian to Romanian.
184
if (!strcmp(language, "mo")) return "ro";
186
// Tagalog to Filipino.
187
if (!strcmp(language, "tl")) return "fil";
192
// Converts user input from BCP47 locale id format to ICU compatible format.
193
// Returns false if uloc_forLanguageTag call fails or if extension is too long.
194
static bool BCP47ToICUFormat(const char* locale_id, char* result) {
195
UErrorCode status = U_ZERO_ERROR;
196
int32_t locale_size = 0;
198
char locale[ULOC_FULLNAME_CAPACITY];
199
I18NUtils::StrNCopy(locale, ULOC_FULLNAME_CAPACITY, locale_id);
201
// uloc_forLanguageTag has a bug where long extension can crash the code.
202
// We need to check if extension part of language id conforms to the length.
203
// ICU bug: http://bugs.icu-project.org/trac/ticket/8519
204
const char* extension = strstr(locale_id, "-u-");
205
if (extension != NULL &&
206
strlen(extension) > ULOC_KEYWORD_AND_VALUES_CAPACITY) {
207
// Truncate to get non-crashing string, but still preserve base language.
208
int base_length = strlen(locale_id) - strlen(extension);
209
locale[base_length] = '\0';
212
uloc_forLanguageTag(locale, result, ULOC_FULLNAME_CAPACITY,
213
&locale_size, &status);
214
return !U_FAILURE(status);
217
// Compares locale id subtags.
218
// Returns 1 for match or -1 for mismatch.
219
static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) {
220
return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1;
223
// Builds a BCP47 compliant locale id from base name of matched locale and
224
// full user specified locale.
225
// Returns false if uloc_toLanguageTag failed to convert locale id.
227
// base_name of matched locale (ICU ID): de_DE
228
// input_locale_name (ICU ID): de_AT@collation=phonebk
229
// result (ICU ID): de_DE@collation=phonebk
230
// result (BCP47 ID): de-DE-u-co-phonebk
231
static bool BuildLocaleName(const char* base_name,
232
const char* input_locale_name,
233
LocaleIDMatch* result) {
234
I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
236
// Get extensions (if any) from the original locale.
237
const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR);
238
if (extension != NULL) {
239
I18NUtils::StrNCopy(result->icu_id + strlen(base_name),
240
ULOC_KEYWORD_AND_VALUES_CAPACITY, extension);
242
I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
245
// Convert ICU locale name into BCP47 format.
246
UErrorCode status = U_ZERO_ERROR;
247
uloc_toLanguageTag(result->icu_id, result->bcp47_id,
248
ULOC_FULLNAME_CAPACITY, false, &status);
249
return !U_FAILURE(status);
252
} } // namespace v8::internal