blob: 22083f098fc04b1a96b47f16ef47d56c42c2648a [file] [log] [blame]
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/objects/js-segmenter.h"
#include <map>
#include <memory>
#include <string>
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects-inl.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-segmenter-inl.h"
#include "src/objects/managed.h"
#include "unicode/brkiter.h"
namespace v8 {
namespace internal {
JSSegmenter::LineBreakStyle JSSegmenter::GetLineBreakStyle(const char* str) {
if (strcmp(str, "strict") == 0) return JSSegmenter::LineBreakStyle::STRICT;
if (strcmp(str, "normal") == 0) return JSSegmenter::LineBreakStyle::NORMAL;
if (strcmp(str, "loose") == 0) return JSSegmenter::LineBreakStyle::LOOSE;
UNREACHABLE();
}
JSSegmenter::Granularity JSSegmenter::GetGranularity(const char* str) {
if (strcmp(str, "grapheme") == 0) return JSSegmenter::Granularity::GRAPHEME;
if (strcmp(str, "word") == 0) return JSSegmenter::Granularity::WORD;
if (strcmp(str, "sentence") == 0) return JSSegmenter::Granularity::SENTENCE;
if (strcmp(str, "line") == 0) return JSSegmenter::Granularity::LINE;
UNREACHABLE();
}
MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
Isolate* isolate, Handle<JSSegmenter> segmenter_holder,
Handle<Object> locales, Handle<Object> input_options) {
segmenter_holder->set_flags(0);
// 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
Maybe<std::vector<std::string>> maybe_requested_locales =
Intl::CanonicalizeLocaleList(isolate, locales);
MAYBE_RETURN(maybe_requested_locales, Handle<JSSegmenter>());
std::vector<std::string> requested_locales =
maybe_requested_locales.FromJust();
// 11. If options is undefined, then
Handle<JSReceiver> options;
if (input_options->IsUndefined(isolate)) {
// 11. a. Let options be ObjectCreate(null).
options = isolate->factory()->NewJSObjectWithNullProto();
// 12. Else
} else {
// 23. a. Let options be ? ToObject(options).
ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
Object::ToObject(isolate, input_options),
JSSegmenter);
}
// 4. Let opt be a new Record.
// 5. Let matcher be ? GetOption(options, "localeMatcher", "string",
// « "lookup", "best fit" », "best fit").
// 6. Set opt.[[localeMatcher]] to matcher.
Maybe<Intl::MatcherOption> maybe_locale_matcher =
Intl::GetLocaleMatcher(isolate, options, "Intl.Segmenter");
MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSSegmenter>());
Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
// 8. Set opt.[[lb]] to lineBreakStyle.
// 9. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
Intl::ResolvedLocale r =
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
requested_locales, matcher, {});
// 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
// "strict", "normal", "loose" », "normal").
Maybe<LineBreakStyle> maybe_line_break_style =
Intl::GetStringOption<LineBreakStyle>(
isolate, options, "lineBreakStyle", "Intl.Segmenter",
{"strict", "normal", "loose"},
{LineBreakStyle::STRICT, LineBreakStyle::NORMAL,
LineBreakStyle::LOOSE},
LineBreakStyle::NORMAL);
MAYBE_RETURN(maybe_line_break_style, MaybeHandle<JSSegmenter>());
LineBreakStyle line_break_style_enum = maybe_line_break_style.FromJust();
// 10. Set segmenter.[[Locale]] to the value of r.[[Locale]].
Handle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
segmenter_holder->set_locale(*locale_str);
// 13. Let granularity be ? GetOption(options, "granularity", "string", «
// "grapheme", "word", "sentence", "line" », "grapheme").
Maybe<Granularity> maybe_granularity = Intl::GetStringOption<Granularity>(
isolate, options, "granularity", "Intl.Segmenter",
{"grapheme", "word", "sentence", "line"},
{Granularity::GRAPHEME, Granularity::WORD, Granularity::SENTENCE,
Granularity::LINE},
Granularity::GRAPHEME);
MAYBE_RETURN(maybe_granularity, MaybeHandle<JSSegmenter>());
Granularity granularity_enum = maybe_granularity.FromJust();
// 14. Set segmenter.[[SegmenterGranularity]] to granularity.
segmenter_holder->set_granularity(granularity_enum);
// 15. If granularity is "line",
if (granularity_enum == Granularity::LINE) {
// a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
segmenter_holder->set_line_break_style(line_break_style_enum);
} else {
segmenter_holder->set_line_break_style(LineBreakStyle::NOTSET);
}
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::BreakIterator> icu_break_iterator;
switch (granularity_enum) {
case Granularity::GRAPHEME:
icu_break_iterator.reset(
icu::BreakIterator::createCharacterInstance(icu_locale, status));
break;
case Granularity::WORD:
icu_break_iterator.reset(
icu::BreakIterator::createWordInstance(icu_locale, status));
break;
case Granularity::SENTENCE:
icu_break_iterator.reset(
icu::BreakIterator::createSentenceInstance(icu_locale, status));
break;
case Granularity::LINE: {
// 15. If granularity is "line",
// a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
const char* key = uloc_toLegacyKey("lb");
CHECK_NOT_NULL(key);
const char* value =
uloc_toLegacyType(key, segmenter_holder->LineBreakStyleAsCString());
CHECK_NOT_NULL(value);
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue(key, value, status);
CHECK(U_SUCCESS(status));
icu_break_iterator.reset(
icu::BreakIterator::createLineInstance(icu_locale, status));
break;
}
case Granularity::COUNT:
UNREACHABLE();
}
CHECK(U_SUCCESS(status));
CHECK_NOT_NULL(icu_break_iterator.get());
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
std::move(icu_break_iterator));
segmenter_holder->set_icu_break_iterator(*managed_break_iterator);
return segmenter_holder;
}
// ecma402 #sec-Intl.Segmenter.prototype.resolvedOptions
Handle<JSObject> JSSegmenter::ResolvedOptions(
Isolate* isolate, Handle<JSSegmenter> segmenter_holder) {
Factory* factory = isolate->factory();
// 3. Let options be ! ObjectCreate(%ObjectPrototype%).
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
// 4. For each row of Table 1, except the header row, do
// a. Let p be the Property value of the current row.
// b. Let v be the value of pr's internal slot whose name is the Internal Slot
// value of the current row.
//
// c. If v is not undefined, then
// i. Perform ! CreateDataPropertyOrThrow(options, p, v).
// Table 1: Resolved Options of Segmenter Instances
// Internal Slot Property
// [[Locale]] "locale"
// [[SegmenterGranularity]] "granularity"
// [[SegmenterLineBreakStyle]] "lineBreakStyle"
Handle<String> locale(segmenter_holder->locale(), isolate);
JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
NONE);
JSObject::AddProperty(isolate, result, factory->granularity_string(),
segmenter_holder->GranularityAsString(), NONE);
if (segmenter_holder->line_break_style() != LineBreakStyle::NOTSET) {
JSObject::AddProperty(isolate, result, factory->lineBreakStyle_string(),
segmenter_holder->LineBreakStyleAsString(), NONE);
}
// 5. Return options.
return result;
}
const char* JSSegmenter::LineBreakStyleAsCString() const {
switch (line_break_style()) {
case LineBreakStyle::STRICT:
return "strict";
case LineBreakStyle::NORMAL:
return "normal";
case LineBreakStyle::LOOSE:
return "loose";
case LineBreakStyle::COUNT:
case LineBreakStyle::NOTSET:
UNREACHABLE();
}
}
Handle<String> JSSegmenter::LineBreakStyleAsString() const {
switch (line_break_style()) {
case LineBreakStyle::STRICT:
return GetReadOnlyRoots().strict_string_handle();
case LineBreakStyle::NORMAL:
return GetReadOnlyRoots().normal_string_handle();
case LineBreakStyle::LOOSE:
return GetReadOnlyRoots().loose_string_handle();
case LineBreakStyle::COUNT:
case LineBreakStyle::NOTSET:
UNREACHABLE();
}
}
Handle<String> JSSegmenter::GranularityAsString() const {
switch (granularity()) {
case Granularity::GRAPHEME:
return GetReadOnlyRoots().grapheme_string_handle();
case Granularity::WORD:
return GetReadOnlyRoots().word_string_handle();
case Granularity::SENTENCE:
return GetReadOnlyRoots().sentence_string_handle();
case Granularity::LINE:
return GetReadOnlyRoots().line_string_handle();
case Granularity::COUNT:
UNREACHABLE();
}
}
std::set<std::string> JSSegmenter::GetAvailableLocales() {
int32_t num_locales = 0;
const icu::Locale* icu_available_locales =
icu::BreakIterator::getAvailableLocales(num_locales);
return Intl::BuildLocaleSet(icu_available_locales, num_locales);
}
} // namespace internal
} // namespace v8