blob: 1053291e54c06e4213b7886ec664baaa2d5a8e39 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file defines a helper class for selecting a supported language from a
// set of candidates.
#include "chrome/installer/util/language_selector.h"
#include <stddef.h>
#include <algorithm>
#include <functional>
#include "base/logging.h"
#include "base/macros.h"
#include "base/strings/string16.h"
#include "base/strings/string_util.h"
#include "base/win/i18n.h"
#include "chrome/installer/util/google_update_settings.h"
#include "chrome/installer/util/installer_util_strings.h"
namespace {
struct LangToOffset {
const wchar_t* language;
int offset;
};
// The language we fall back upon when all else fails.
const wchar_t kFallbackLanguage[] = L"en-us";
const int kFallbackLanguageOffset = IDS_L10N_OFFSET_EN_US;
// A sorted array of language identifiers (and their offsets) for which
// translations are available. The contents of the array are generated by
// create_string_rc.py.
const LangToOffset kLanguageOffsetPairs[] = {
#define HANDLE_LANGUAGE(l_, o_) { L ## #l_, o_ },
DO_LANGUAGES
#undef HANDLE_LANGUAGE
};
// A sorted array of language identifiers that are aliases to other languages
// for which translations are available.
const LangToOffset kLanguageToOffsetExceptions[] = {
// Alias some English variants to British English (all others wildcard to US).
{ L"en-au", IDS_L10N_OFFSET_EN_GB },
{ L"en-ca", IDS_L10N_OFFSET_EN_GB },
{ L"en-nz", IDS_L10N_OFFSET_EN_GB },
{ L"en-za", IDS_L10N_OFFSET_EN_GB },
// Alias es-es to es (all others wildcard to es-419).
{ L"es-es", IDS_L10N_OFFSET_ES },
// Google web properties use iw for he. Handle both just to be safe.
{ L"he", IDS_L10N_OFFSET_IW },
// Google web properties use no for nb. Handle both just to be safe.
{ L"nb", IDS_L10N_OFFSET_NO },
// Some Google web properties use tl for fil. Handle both just to be safe.
// They're not completely identical, but alias it here.
{ L"tl", IDS_L10N_OFFSET_FIL },
// Pre-Vista aliases for Chinese w/ script subtag.
{ L"zh-chs", IDS_L10N_OFFSET_ZH_CN },
{ L"zh-cht", IDS_L10N_OFFSET_ZH_TW },
// Vista+ aliases for Chinese w/ script subtag.
{ L"zh-hans", IDS_L10N_OFFSET_ZH_CN },
{ L"zh-hant", IDS_L10N_OFFSET_ZH_TW },
// Alias Hong Kong and Macau to Taiwan.
{ L"zh-hk", IDS_L10N_OFFSET_ZH_TW },
{ L"zh-mo", IDS_L10N_OFFSET_ZH_TW },
// Although the wildcard entry for zh would result in this, alias zh-sg so
// that it will win if it precedes another valid tag in a list of candidates.
{ L"zh-sg", IDS_L10N_OFFSET_ZH_CN }
};
// A sorted array of neutral language identifiers that are wildcard aliases to
// other languages for which translations are available.
const LangToOffset kLanguageToOffsetWildcards[] = {
// Use the U.S. region for anything English.
{ L"en", IDS_L10N_OFFSET_EN_US },
// Use the Latin American region for anything Spanish.
{ L"es", IDS_L10N_OFFSET_ES_419 },
// Use the Brazil region for anything Portugese.
{ L"pt", IDS_L10N_OFFSET_PT_BR },
// Use the P.R.C. region for anything Chinese.
{ L"zh", IDS_L10N_OFFSET_ZH_CN }
};
#if !defined(NDEBUG)
// Returns true if the items in the given range are sorted. If
// |byNameAndOffset| is true, the items must be sorted by both name and offset.
bool IsArraySorted(const LangToOffset* first, const LangToOffset* last,
bool byNameAndOffset) {
if (last - first > 1) {
for (--last; first != last; ++first) {
if (!(std::wstring(first->language) < (first + 1)->language) ||
(byNameAndOffset && !(first->offset < (first + 1)->offset))) {
return false;
}
}
}
return true;
}
// Validates that the static read-only mappings are properly sorted.
void ValidateMappings() {
// Ensure that kLanguageOffsetPairs is sorted.
DCHECK(IsArraySorted(&kLanguageOffsetPairs[0],
&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
true)) << "kOffsetToLanguageId is not sorted";
// Ensure that kLanguageToOffsetExceptions is sorted.
DCHECK(IsArraySorted(
&kLanguageToOffsetExceptions[0],
&kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
false)) << "kLanguageToOffsetExceptions is not sorted";
// Ensure that kLanguageToOffsetWildcards is sorted.
DCHECK(IsArraySorted(
&kLanguageToOffsetWildcards[0],
&kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
false)) << "kLanguageToOffsetWildcards is not sorted";
}
#endif // !defined(NDEBUG)
// A less-than overload to do slightly more efficient searches in the
// sorted arrays.
bool operator<(const LangToOffset& left, const std::wstring& right) {
return left.language < right;
}
// A compare function for searching in a sorted array by offset.
bool IsOffsetLessThan(const LangToOffset& left, const LangToOffset& right) {
return left.offset < right.offset;
}
// Binary search in one of the sorted arrays to find the offset corresponding to
// a given language |name|.
bool TryFindOffset(const LangToOffset* first, const LangToOffset* last,
const std::wstring& name, int* offset) {
const LangToOffset* search_result = std::lower_bound(first, last, name);
if (last != search_result && search_result->language == name) {
*offset = search_result->offset;
return true;
}
return false;
}
// A predicate function for LanguageSelector::SelectIf that searches for the
// offset of a translated language. The search first tries to find an exact
// match. Failing that, an exact match with an alias is attempted.
bool GetLanguageOffset(const std::wstring& language, int* offset) {
// Note: always perform the exact match first so that an alias is never
// selected in place of a future translation.
return
TryFindOffset(
&kLanguageOffsetPairs[0],
&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
language, offset) ||
TryFindOffset(
&kLanguageToOffsetExceptions[0],
&kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
language, offset);
}
// A predicate function for LanguageSelector::SelectIf that searches for a
// wildcard match with |language|'s primary language subtag.
bool MatchLanguageOffset(const std::wstring& language, int* offset) {
std::wstring primary_language = language.substr(0, language.find(L'-'));
// Now check for wildcards.
return
TryFindOffset(
&kLanguageToOffsetWildcards[0],
&kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
primary_language, offset);
}
// Adds to |candidates| the eligible languages on the system. Any language
// setting specified by Omaha takes precedence over the operating system's
// configured languages.
void GetCandidatesFromSystem(std::vector<std::wstring>* candidates) {
DCHECK(candidates);
base::string16 language;
// Omaha gets first pick.
GoogleUpdateSettings::GetLanguage(&language);
if (!language.empty()) {
candidates->push_back(language);
}
// Now try the Windows UI languages. Use the thread preferred since that will
// kindly return us a list of all kinds of fallbacks.
base::win::i18n::GetThreadPreferredUILanguageList(candidates);
}
} // namespace
namespace installer {
LanguageSelector::LanguageSelector()
: offset_(arraysize(kLanguageOffsetPairs)) {
#if !defined(NDEBUG)
ValidateMappings();
#endif // !defined(NDEBUG)
std::vector<std::wstring> candidates;
GetCandidatesFromSystem(&candidates);
DoSelect(candidates);
}
LanguageSelector::LanguageSelector(const std::vector<std::wstring>& candidates)
: offset_(arraysize(kLanguageOffsetPairs)) {
#if !defined(NDEBUG)
ValidateMappings();
#endif // !defined(NDEBUG)
DoSelect(candidates);
}
LanguageSelector::~LanguageSelector() {
}
// static
std::wstring LanguageSelector::GetLanguageName(int offset) {
DCHECK_GE(offset, 0);
DCHECK_LT(static_cast<size_t>(offset), arraysize(kLanguageOffsetPairs));
LangToOffset value = { NULL, offset };
const LangToOffset* search_result =
std::lower_bound(&kLanguageOffsetPairs[0],
&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
value, IsOffsetLessThan);
if (&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)] != search_result &&
search_result->offset == offset) {
return search_result->language;
}
NOTREACHED() << "Unknown language offset.";
return std::wstring(&kFallbackLanguage[0], arraysize(kFallbackLanguage) - 1);
}
// Runs through the set of candidates, sending their downcased representation
// through |select_predicate|. Returns true if the predicate selects a
// candidate, in which case |matched_name| is assigned the value of the
// candidate and |matched_offset| is assigned the language offset of the
// selected translation.
// static
bool LanguageSelector::SelectIf(const std::vector<std::wstring>& candidates,
SelectPred_Fn select_predicate,
std::wstring* matched_name,
int* matched_offset) {
for (const std::wstring& scan : candidates) {
if (select_predicate(base::ToLowerASCII(scan), matched_offset)) {
matched_name->assign(scan);
return true;
}
}
return false;
}
// Select the best-fit translation from the ordered list |candidates|.
// At the conclusion, this instance's |matched_candidate_| and |offset_| members
// are set to the name of the selected candidate and the offset of the matched
// translation. If no translation is selected, the fallback's name and offset
// are selected.
void LanguageSelector::DoSelect(const std::vector<std::wstring>& candidates) {
// Make a pass through the candidates looking for an exact or alias match.
// Failing that, make another pass looking for a wildcard match.
if (!SelectIf(candidates, &GetLanguageOffset, &matched_candidate_,
&offset_) &&
!SelectIf(candidates, &MatchLanguageOffset, &matched_candidate_,
&offset_)) {
VLOG(1) << "No suitable language found for any candidates.";
// Our fallback is "en-us"
matched_candidate_.assign(&kFallbackLanguage[0],
arraysize(kFallbackLanguage) - 1);
offset_ = kFallbackLanguageOffset;
}
}
} // namespace installer