components/translate/core/language_detection/language_detection_util.cc - chromium/src - Git at Google

 // Copyright 2014 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/translate/core/language_detection/language_detection_util.h"

 #include <stddef.h>

 #include "base/logging.h"
 #include "base/metrics/histogram_base.h"
 #include "base/metrics/histogram_functions.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/metrics/metrics_hashes.h"
 #include "base/stl_util.h"
 #include "base/strings/string_split.h"
 #include "base/strings/string_util.h"
 #include "base/strings/utf_string_conversions.h"
 #include "components/translate/core/common/translate_constants.h"
 #include "components/translate/core/common/translate_metrics.h"
 #include "components/translate/core/common/translate_util.h"
 #include "components/translate/core/language_detection/chinese_script_classifier.h"
 #include "third_party/cld_3/src/src/nnet_language_identifier.h"

 namespace {

 // Similar language code list. Some languages are very similar and difficult
 // for CLD to distinguish.
 struct SimilarLanguageCode {
   const char* const code;
   int group;
 };

 const SimilarLanguageCode kSimilarLanguageCodes[] = {
   {"bs", 1},
   {"hr", 1},
   {"hi", 2},
   {"ne", 2},
 };

 // Checks |kSimilarLanguageCodes| and returns group code.
 int GetSimilarLanguageGroupCode(const std::string& language) {
   for (size_t i = 0; i < base::size(kSimilarLanguageCodes); ++i) {
     if (language.find(kSimilarLanguageCodes[i].code) != 0)
       continue;
     return kSimilarLanguageCodes[i].group;
   }
   return 0;
 }

 // Well-known languages which often have wrong server configuration of
 // Content-Language: en.
 const char* const kWellKnownCodesOnWrongConfiguration[] = {
     "es",    "pt", "ja", "ru", "de", "zh-CN",
     "zh-TW", "ar", "id", "fr", "it", "th"};

 // Applies a series of language code modification in proper order.
 void ApplyLanguageCodeCorrection(std::string* code) {
   // Correct well-known format errors.
   translate::CorrectLanguageCodeTypo(code);

   if (!translate::IsValidLanguageCode(*code)) {
     *code = std::string();
     return;
   }

   translate::ToTranslateLanguageSynonym(code);
 }

 // Returns the ISO 639 language code of the specified |text|, or 'unknown' if it
 // failed.
 // |is_cld_reliable| will be set as true if CLD says the detection is reliable.
 std::string DetermineTextLanguage(const base::string16& text,
                                   bool* is_cld_reliable) {
   std::string language = translate::kUnknownLanguageCode;
   const std::string utf8_text(base::UTF16ToUTF8(text));

   // Make a prediction.
   chrome_lang_id::NNetLanguageIdentifier lang_id;
   const chrome_lang_id::NNetLanguageIdentifier::Result lang_id_result =
       lang_id.FindTopNMostFreqLangs(utf8_text, /*num_langs=*/1).at(0);
   const bool prediction_reliable = lang_id_result.is_reliable;
   const std::string& predicted_language = lang_id_result.language;

   // Update histograms.
   const base::HistogramBase::Sample pred_lang_hash =
       static_cast<base::HistogramBase::Sample>(
           base::HashMetricName(predicted_language));
   base::UmaHistogramSparse("Translate.CLD3.LanguageDetected", pred_lang_hash);
   if (predicted_language != chrome_lang_id::NNetLanguageIdentifier::kUnknown) {
     UMA_HISTOGRAM_PERCENTAGE("Translate.CLD3.LanguagePercentage",
                              static_cast<int>(100 * lang_id_result.proportion));
   }

   if (is_cld_reliable != nullptr) {
     *is_cld_reliable = prediction_reliable;
   }

   // Ignore unreliable, "unknown", and xx-Latn predictions that are currently
   // not supported.
   if (prediction_reliable &&
       predicted_language != "bg-Latn" &&
       predicted_language != "el-Latn" &&
       predicted_language != "ja-Latn" &&
       predicted_language != "ru-Latn" &&
       predicted_language != "zh-Latn" &&
       predicted_language !=
           chrome_lang_id::NNetLanguageIdentifier::kUnknown) {
     if (predicted_language != "zh") {
       language = predicted_language;
     } else {
       // If prediction is "zh" (Chinese), then we need to determine whether the
       // text is zh-Hant (Chinese Traditional) or zh-Hans (Chinese Simplified).
       translate::ChineseScriptClassifier zh_classifier;

       // The Classify function returns either "zh-Hant" or "zh-Hans".
       // Convert to the old-style language codes used by the Translate API.
       const std::string zh_classification = zh_classifier.Classify(utf8_text);
       if (zh_classification == "zh-Hant") {
         language = "zh-TW";
       } else if (zh_classification == "zh-Hans") {
         language = "zh-CN";
       } else {
         language = translate::kUnknownLanguageCode;
       }
     }
   }

   VLOG(1) << "Detected language: " << language;
   return language;
 }

 // Checks if CLD can complement a sub code when the page language doesn't know
 // the sub code.
 bool CanCLDComplementSubCode(
     const std::string& page_language, const std::string& cld_language) {
   // Translate server cannot treat general Chinese. If Content-Language and
   // CLD agree that the language is Chinese and Content-Language doesn't know
   // which dialect is used, CLD language has priority.
   // TODO(hajimehoshi): How about the other dialects like zh-MO?
   return page_language == "zh" &&
          base::StartsWith(cld_language, "zh-",
                           base::CompareCase::INSENSITIVE_ASCII);
 }

 }  // namespace

 namespace translate {

 std::string DeterminePageLanguage(const std::string& code,
                                   const std::string& html_lang,
                                   const base::string16& contents,
                                   std::string* cld_language_p,
                                   bool* is_cld_reliable_p) {
   bool is_cld_reliable;
   // Check if html lang attribute is valid.
   std::string modified_html_lang;
   if (!html_lang.empty()) {
     modified_html_lang = html_lang;
     ApplyLanguageCodeCorrection(&modified_html_lang);
     translate::ReportHtmlLang(html_lang, modified_html_lang);
     VLOG(9) << "html lang based language code: " << modified_html_lang;
   }

   // Check if Content-Language is valid.
   std::string modified_code;
   if (!code.empty()) {
     modified_code = code;
     ApplyLanguageCodeCorrection(&modified_code);
     translate::ReportContentLanguage(code, modified_code);
   }

   std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);

   if (cld_language_p != nullptr)
     *cld_language_p = cld_language;
   if (is_cld_reliable_p != nullptr)
     *is_cld_reliable_p = is_cld_reliable;
   translate::ToTranslateLanguageSynonym(&cld_language);

   // Adopt |modified_html_lang| if it is valid. Otherwise, adopt
   // |modified_code|.
   std::string language = modified_html_lang.empty() ? modified_code :
                                                       modified_html_lang;

   // When the page language is English, log conflicting CLD results. We will use
   // these metrics to decide when to favor CLD.
   if (language.substr(0, 2) == "en" && cld_language.substr(0, 2) != "en" &&
       cld_language != kUnknownLanguageCode) {
     translate::ReportLanguageDetectionConflict(language, cld_language);
   }

   // If |language| is empty, just use CLD result even though it might be
   // translate::kUnknownLanguageCode.
   if (language.empty()) {
     translate::ReportLanguageVerification(
         translate::LANGUAGE_VERIFICATION_CLD_ONLY);
     return cld_language;
   }

   if (cld_language == kUnknownLanguageCode) {
     translate::ReportLanguageVerification(
         translate::LANGUAGE_VERIFICATION_UNKNOWN);
     return language;
   }

   if (CanCLDComplementSubCode(language, cld_language)) {
     translate::ReportLanguageVerification(
         translate::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
     return cld_language;
   }

   if (IsSameOrSimilarLanguages(language, cld_language)) {
     translate::ReportLanguageVerification(
         translate::LANGUAGE_VERIFICATION_CLD_AGREE);
     return language;
   }

   if (MaybeServerWrongConfiguration(language, cld_language)) {
     translate::ReportLanguageVerification(
         translate::LANGUAGE_VERIFICATION_TRUST_CLD);
     return cld_language;
   }

   // Content-Language value might be wrong because CLD says that this page is
   // written in another language with confidence. In this case, Chrome doesn't
   // rely on any of the language codes, and gives up suggesting a translation.
   translate::ReportLanguageVerification(
       translate::LANGUAGE_VERIFICATION_CLD_DISAGREE);
   return kUnknownLanguageCode;
 }

 void CorrectLanguageCodeTypo(std::string* code) {
   DCHECK(code);

   size_t coma_index = code->find(',');
   if (coma_index != std::string::npos) {
     // There are more than 1 language specified, just keep the first one.
     *code = code->substr(0, coma_index);
   }
   base::TrimWhitespaceASCII(*code, base::TRIM_ALL, code);

   // An underscore instead of a dash is a frequent mistake.
   size_t underscore_index = code->find('_');
   if (underscore_index != std::string::npos)
     (*code)[underscore_index] = '-';

   // Change everything up to a dash to lower-case and everything after to upper.
   size_t dash_index = code->find('-');
   if (dash_index != std::string::npos) {
     *code = base::ToLowerASCII(code->substr(0, dash_index)) +
         base::ToUpperASCII(code->substr(dash_index));
   } else {
     *code = base::ToLowerASCII(*code);
   }
 }

 bool IsValidLanguageCode(const std::string& code) {
   // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/.
   // TODO(hajimehoshi): How about es-419, which is used as an Accept language?
   std::vector<base::StringPiece> chunks = base::SplitStringPiece(
       code, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);

   if (chunks.size() < 1 || 2 < chunks.size())
     return false;

   const base::StringPiece& main_code = chunks[0];

   if (main_code.size() < 1 || 3 < main_code.size())
     return false;

   for (char c : main_code) {
     if (!base::IsAsciiAlpha(c))
       return false;
   }

   if (chunks.size() == 1)
     return true;

   const base::StringPiece& sub_code = chunks[1];

   if (sub_code.size() != 2)
     return false;

   for (char c : sub_code) {
     if (!base::IsAsciiAlpha(c))
       return false;
   }

   return true;
 }

 bool IsSameOrSimilarLanguages(const std::string& page_language,
                               const std::string& cld_language) {
   std::vector<std::string> chunks = base::SplitString(
       page_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
   if (chunks.size() == 0)
     return false;
   std::string page_language_main_part = chunks[0];  // Need copy.

   chunks = base::SplitString(
       cld_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
   if (chunks.size() == 0)
     return false;
   const std::string& cld_language_main_part = chunks[0];

   // Language code part of |page_language| is matched to one of |cld_language|.
   // Country code is ignored here.
   if (page_language_main_part == cld_language_main_part) {
     // Languages are matched strictly. Reports false to metrics, but returns
     // true.
     translate::ReportSimilarLanguageMatch(false);
     return true;
   }

   // Check if |page_language| and |cld_language| are in the similar language
   // list and belong to the same language group.
   int page_code = GetSimilarLanguageGroupCode(page_language);
   bool match = page_code != 0 &&
                page_code == GetSimilarLanguageGroupCode(cld_language);

   translate::ReportSimilarLanguageMatch(match);
   return match;
 }

 bool IsServerWrongConfigurationLanguage(const std::string& language_code) {
   for (size_t i = 0; i < base::size(kWellKnownCodesOnWrongConfiguration); ++i) {
     if (language_code == kWellKnownCodesOnWrongConfiguration[i])
       return true;
   }
   return false;
 }

 bool MaybeServerWrongConfiguration(const std::string& page_language,
                                    const std::string& cld_language) {
   // If |page_language| is not "en-*", respect it and just return false here.
   if (!base::StartsWith(page_language, "en",
                         base::CompareCase::INSENSITIVE_ASCII))
     return false;

   // A server provides a language meta information representing "en-*". But it
   // might be just a default value due to missing user configuration.
   // Let's trust |cld_language| if the determined language is not difficult to
   // distinguish from English, and the language is one of well-known languages
   // which often provide "en-*" meta information mistakenly.
   return IsServerWrongConfigurationLanguage(cld_language);
 }

 }  // namespace translate
	// Copyright 2014 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "components/translate/core/language_detection/language_detection_util.h"

	#include <stddef.h>

	#include "base/logging.h"
	#include "base/metrics/histogram_base.h"
	#include "base/metrics/histogram_functions.h"
	#include "base/metrics/histogram_macros.h"
	#include "base/metrics/metrics_hashes.h"
	#include "base/stl_util.h"
	#include "base/strings/string_split.h"
	#include "base/strings/string_util.h"
	#include "base/strings/utf_string_conversions.h"
	#include "components/translate/core/common/translate_constants.h"
	#include "components/translate/core/common/translate_metrics.h"
	#include "components/translate/core/common/translate_util.h"
	#include "components/translate/core/language_detection/chinese_script_classifier.h"
	#include "third_party/cld_3/src/src/nnet_language_identifier.h"

	namespace {

	// Similar language code list. Some languages are very similar and difficult
	// for CLD to distinguish.
	struct SimilarLanguageCode {
	const char* const code;
	int group;
	};

	const SimilarLanguageCode kSimilarLanguageCodes[] = {
	{"bs", 1},
	{"hr", 1},
	{"hi", 2},
	{"ne", 2},
	};

	// Checks \|kSimilarLanguageCodes\| and returns group code.
	int GetSimilarLanguageGroupCode(const std::string& language) {
	for (size_t i = 0; i < base::size(kSimilarLanguageCodes); ++i) {
	if (language.find(kSimilarLanguageCodes[i].code) != 0)
	continue;
	return kSimilarLanguageCodes[i].group;
	}
	return 0;
	}

	// Well-known languages which often have wrong server configuration of
	// Content-Language: en.
	const char* const kWellKnownCodesOnWrongConfiguration[] = {
	"es", "pt", "ja", "ru", "de", "zh-CN",
	"zh-TW", "ar", "id", "fr", "it", "th"};

	// Applies a series of language code modification in proper order.
	void ApplyLanguageCodeCorrection(std::string* code) {
	// Correct well-known format errors.
	translate::CorrectLanguageCodeTypo(code);

	if (!translate::IsValidLanguageCode(*code)) {
	*code = std::string();
	return;
	}

	translate::ToTranslateLanguageSynonym(code);
	}

	// Returns the ISO 639 language code of the specified \|text\|, or 'unknown' if it
	// failed.
	// \|is_cld_reliable\| will be set as true if CLD says the detection is reliable.
	std::string DetermineTextLanguage(const base::string16& text,
	bool* is_cld_reliable) {
	std::string language = translate::kUnknownLanguageCode;
	const std::string utf8_text(base::UTF16ToUTF8(text));

	// Make a prediction.
	chrome_lang_id::NNetLanguageIdentifier lang_id;
	const chrome_lang_id::NNetLanguageIdentifier::Result lang_id_result =
	lang_id.FindTopNMostFreqLangs(utf8_text, /num_langs=/1).at(0);
	const bool prediction_reliable = lang_id_result.is_reliable;
	const std::string& predicted_language = lang_id_result.language;

	// Update histograms.
	const base::HistogramBase::Sample pred_lang_hash =
	static_cast<base::HistogramBase::Sample>(
	base::HashMetricName(predicted_language));
	base::UmaHistogramSparse("Translate.CLD3.LanguageDetected", pred_lang_hash);
	if (predicted_language != chrome_lang_id::NNetLanguageIdentifier::kUnknown) {
	UMA_HISTOGRAM_PERCENTAGE("Translate.CLD3.LanguagePercentage",
	static_cast<int>(100 * lang_id_result.proportion));
	}

	if (is_cld_reliable != nullptr) {
	*is_cld_reliable = prediction_reliable;
	}

	// Ignore unreliable, "unknown", and xx-Latn predictions that are currently
	// not supported.
	if (prediction_reliable &&
	predicted_language != "bg-Latn" &&
	predicted_language != "el-Latn" &&
	predicted_language != "ja-Latn" &&
	predicted_language != "ru-Latn" &&
	predicted_language != "zh-Latn" &&
	predicted_language !=
	chrome_lang_id::NNetLanguageIdentifier::kUnknown) {
	if (predicted_language != "zh") {
	language = predicted_language;
	} else {
	// If prediction is "zh" (Chinese), then we need to determine whether the
	// text is zh-Hant (Chinese Traditional) or zh-Hans (Chinese Simplified).
	translate::ChineseScriptClassifier zh_classifier;

	// The Classify function returns either "zh-Hant" or "zh-Hans".
	// Convert to the old-style language codes used by the Translate API.
	const std::string zh_classification = zh_classifier.Classify(utf8_text);
	if (zh_classification == "zh-Hant") {
	language = "zh-TW";
	} else if (zh_classification == "zh-Hans") {
	language = "zh-CN";
	} else {
	language = translate::kUnknownLanguageCode;
	}
	}
	}

	VLOG(1) << "Detected language: " << language;
	return language;
	}

	// Checks if CLD can complement a sub code when the page language doesn't know
	// the sub code.
	bool CanCLDComplementSubCode(
	const std::string& page_language, const std::string& cld_language) {
	// Translate server cannot treat general Chinese. If Content-Language and
	// CLD agree that the language is Chinese and Content-Language doesn't know
	// which dialect is used, CLD language has priority.
	// TODO(hajimehoshi): How about the other dialects like zh-MO?
	return page_language == "zh" &&
	base::StartsWith(cld_language, "zh-",
	base::CompareCase::INSENSITIVE_ASCII);
	}

	} // namespace

	namespace translate {

	std::string DeterminePageLanguage(const std::string& code,
	const std::string& html_lang,
	const base::string16& contents,
	std::string* cld_language_p,
	bool* is_cld_reliable_p) {
	bool is_cld_reliable;
	// Check if html lang attribute is valid.
	std::string modified_html_lang;
	if (!html_lang.empty()) {
	modified_html_lang = html_lang;
	ApplyLanguageCodeCorrection(&modified_html_lang);
	translate::ReportHtmlLang(html_lang, modified_html_lang);
	VLOG(9) << "html lang based language code: " << modified_html_lang;
	}

	// Check if Content-Language is valid.
	std::string modified_code;
	if (!code.empty()) {
	modified_code = code;
	ApplyLanguageCodeCorrection(&modified_code);
	translate::ReportContentLanguage(code, modified_code);
	}

	std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);

	if (cld_language_p != nullptr)
	*cld_language_p = cld_language;
	if (is_cld_reliable_p != nullptr)
	*is_cld_reliable_p = is_cld_reliable;
	translate::ToTranslateLanguageSynonym(&cld_language);

	// Adopt \|modified_html_lang\| if it is valid. Otherwise, adopt
	// \|modified_code\|.
	std::string language = modified_html_lang.empty() ? modified_code :
	modified_html_lang;

	// When the page language is English, log conflicting CLD results. We will use
	// these metrics to decide when to favor CLD.
	if (language.substr(0, 2) == "en" && cld_language.substr(0, 2) != "en" &&
	cld_language != kUnknownLanguageCode) {
	translate::ReportLanguageDetectionConflict(language, cld_language);
	}

	// If \|language\| is empty, just use CLD result even though it might be
	// translate::kUnknownLanguageCode.
	if (language.empty()) {
	translate::ReportLanguageVerification(
	translate::LANGUAGE_VERIFICATION_CLD_ONLY);
	return cld_language;
	}

	if (cld_language == kUnknownLanguageCode) {
	translate::ReportLanguageVerification(
	translate::LANGUAGE_VERIFICATION_UNKNOWN);
	return language;
	}

	if (CanCLDComplementSubCode(language, cld_language)) {
	translate::ReportLanguageVerification(
	translate::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
	return cld_language;
	}

	if (IsSameOrSimilarLanguages(language, cld_language)) {
	translate::ReportLanguageVerification(
	translate::LANGUAGE_VERIFICATION_CLD_AGREE);
	return language;
	}

	if (MaybeServerWrongConfiguration(language, cld_language)) {
	translate::ReportLanguageVerification(
	translate::LANGUAGE_VERIFICATION_TRUST_CLD);
	return cld_language;
	}

	// Content-Language value might be wrong because CLD says that this page is
	// written in another language with confidence. In this case, Chrome doesn't
	// rely on any of the language codes, and gives up suggesting a translation.
	translate::ReportLanguageVerification(
	translate::LANGUAGE_VERIFICATION_CLD_DISAGREE);
	return kUnknownLanguageCode;
	}

	void CorrectLanguageCodeTypo(std::string* code) {
	DCHECK(code);

	size_t coma_index = code->find(',');
	if (coma_index != std::string::npos) {
	// There are more than 1 language specified, just keep the first one.
	*code = code->substr(0, coma_index);
	}
	base::TrimWhitespaceASCII(*code, base::TRIM_ALL, code);

	// An underscore instead of a dash is a frequent mistake.
	size_t underscore_index = code->find('_');
	if (underscore_index != std::string::npos)
	(*code)[underscore_index] = '-';

	// Change everything up to a dash to lower-case and everything after to upper.
	size_t dash_index = code->find('-');
	if (dash_index != std::string::npos) {
	*code = base::ToLowerASCII(code->substr(0, dash_index)) +
	base::ToUpperASCII(code->substr(dash_index));
	} else {
	code = base::ToLowerASCII(code);
	}
	}

	bool IsValidLanguageCode(const std::string& code) {
	// Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/.
	// TODO(hajimehoshi): How about es-419, which is used as an Accept language?
	std::vector<base::StringPiece> chunks = base::SplitStringPiece(
	code, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);

	if (chunks.size() < 1 \|\| 2 < chunks.size())
	return false;

	const base::StringPiece& main_code = chunks[0];

	if (main_code.size() < 1 \|\| 3 < main_code.size())
	return false;

	for (char c : main_code) {
	if (!base::IsAsciiAlpha(c))
	return false;
	}

	if (chunks.size() == 1)
	return true;

	const base::StringPiece& sub_code = chunks[1];

	if (sub_code.size() != 2)
	return false;

	for (char c : sub_code) {
	if (!base::IsAsciiAlpha(c))
	return false;
	}

	return true;
	}

	bool IsSameOrSimilarLanguages(const std::string& page_language,
	const std::string& cld_language) {
	std::vector<std::string> chunks = base::SplitString(
	page_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
	if (chunks.size() == 0)
	return false;
	std::string page_language_main_part = chunks[0]; // Need copy.

	chunks = base::SplitString(
	cld_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
	if (chunks.size() == 0)
	return false;
	const std::string& cld_language_main_part = chunks[0];

	// Language code part of \|page_language\| is matched to one of \|cld_language\|.
	// Country code is ignored here.
	if (page_language_main_part == cld_language_main_part) {
	// Languages are matched strictly. Reports false to metrics, but returns
	// true.
	translate::ReportSimilarLanguageMatch(false);
	return true;
	}

	// Check if \|page_language\| and \|cld_language\| are in the similar language
	// list and belong to the same language group.
	int page_code = GetSimilarLanguageGroupCode(page_language);
	bool match = page_code != 0 &&
	page_code == GetSimilarLanguageGroupCode(cld_language);

	translate::ReportSimilarLanguageMatch(match);
	return match;
	}

	bool IsServerWrongConfigurationLanguage(const std::string& language_code) {
	for (size_t i = 0; i < base::size(kWellKnownCodesOnWrongConfiguration); ++i) {
	if (language_code == kWellKnownCodesOnWrongConfiguration[i])
	return true;
	}
	return false;
	}

	bool MaybeServerWrongConfiguration(const std::string& page_language,
	const std::string& cld_language) {
	// If \|page_language\| is not "en-*", respect it and just return false here.
	if (!base::StartsWith(page_language, "en",
	base::CompareCase::INSENSITIVE_ASCII))
	return false;

	// A server provides a language meta information representing "en-*". But it
	// might be just a default value due to missing user configuration.
	// Let's trust \|cld_language\| if the determined language is not difficult to
	// distinguish from English, and the language is one of well-known languages
	// which often provide "en-*" meta information mistakenly.
	return IsServerWrongConfigurationLanguage(cld_language);
	}

	} // namespace translate