blob: a6b1d02fa91731bf92fa2436434c07bd481f3fa5 [file] [log] [blame]
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/builtins/builtins-utils.h"
#include "src/builtins/builtins.h"
#include "src/conversions.h"
#include "src/counters.h"
#include "src/objects-inl.h"
#include "src/regexp/regexp-utils.h"
#include "src/string-case.h"
#include "src/unicode-inl.h"
#include "src/unicode.h"
namespace v8 {
namespace internal {
namespace { // for String.fromCodePoint
bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
if (!value->IsNumber() && !Object::ToNumber(value).ToHandle(&value)) {
return false;
}
if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
value->Number()) {
return false;
}
if (value->Number() < 0 || value->Number() > 0x10FFFF) {
return false;
}
return true;
}
uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
Handle<Object> value = args.at(1 + index);
ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value, Object::ToNumber(value), -1);
if (!IsValidCodePoint(isolate, value)) {
isolate->Throw(*isolate->factory()->NewRangeError(
MessageTemplate::kInvalidCodePoint, value));
return -1;
}
return DoubleToUint32(value->Number());
}
} // namespace
// ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint) {
HandleScope scope(isolate);
int const length = args.length() - 1;
if (length == 0) return isolate->heap()->empty_string();
DCHECK_LT(0, length);
// Optimistically assume that the resulting String contains only one byte
// characters.
List<uint8_t> one_byte_buffer(length);
uc32 code = 0;
int index;
for (index = 0; index < length; index++) {
code = NextCodePoint(isolate, args, index);
if (code < 0) {
return isolate->heap()->exception();
}
if (code > String::kMaxOneByteCharCode) {
break;
}
one_byte_buffer.Add(code);
}
if (index == length) {
RETURN_RESULT_OR_FAILURE(isolate, isolate->factory()->NewStringFromOneByte(
one_byte_buffer.ToConstVector()));
}
List<uc16> two_byte_buffer(length - index);
while (true) {
if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
two_byte_buffer.Add(code);
} else {
two_byte_buffer.Add(unibrow::Utf16::LeadSurrogate(code));
two_byte_buffer.Add(unibrow::Utf16::TrailSurrogate(code));
}
if (++index == length) {
break;
}
code = NextCodePoint(isolate, args, index);
if (code < 0) {
return isolate->heap()->exception();
}
}
Handle<SeqTwoByteString> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
isolate->factory()->NewRawTwoByteString(one_byte_buffer.length() +
two_byte_buffer.length()));
CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),
one_byte_buffer.length());
CopyChars(result->GetChars() + one_byte_buffer.length(),
two_byte_buffer.ToConstVector().start(), two_byte_buffer.length());
return *result;
}
// ES6 section 21.1.3.6
// String.prototype.endsWith ( searchString [ , endPosition ] )
BUILTIN(StringPrototypeEndsWith) {
HandleScope handle_scope(isolate);
TO_THIS_STRING(str, "String.prototype.endsWith");
// Check if the search string is a regExp and fail if it is.
Handle<Object> search = args.atOrUndefined(isolate, 1);
Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
if (is_reg_exp.IsNothing()) {
DCHECK(isolate->has_pending_exception());
return isolate->heap()->exception();
}
if (is_reg_exp.FromJust()) {
THROW_NEW_ERROR_RETURN_FAILURE(
isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
isolate->factory()->NewStringFromStaticChars(
"String.prototype.endsWith")));
}
Handle<String> search_string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
Object::ToString(isolate, search));
Handle<Object> position = args.atOrUndefined(isolate, 2);
int end;
if (position->IsUndefined(isolate)) {
end = str->length();
} else {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
Object::ToInteger(isolate, position));
end = str->ToValidIndex(*position);
}
int start = end - search_string->length();
if (start < 0) return isolate->heap()->false_value();
str = String::Flatten(str);
search_string = String::Flatten(search_string);
DisallowHeapAllocation no_gc; // ensure vectors stay valid
String::FlatContent str_content = str->GetFlatContent();
String::FlatContent search_content = search_string->GetFlatContent();
if (str_content.IsOneByte() && search_content.IsOneByte()) {
Vector<const uint8_t> str_vector = str_content.ToOneByteVector();
Vector<const uint8_t> search_vector = search_content.ToOneByteVector();
return isolate->heap()->ToBoolean(memcmp(str_vector.start() + start,
search_vector.start(),
search_string->length()) == 0);
}
FlatStringReader str_reader(isolate, str);
FlatStringReader search_reader(isolate, search_string);
for (int i = 0; i < search_string->length(); i++) {
if (str_reader.Get(start + i) != search_reader.Get(i)) {
return isolate->heap()->false_value();
}
}
return isolate->heap()->true_value();
}
// ES6 section 21.1.3.7
// String.prototype.includes ( searchString [ , position ] )
BUILTIN(StringPrototypeIncludes) {
HandleScope handle_scope(isolate);
TO_THIS_STRING(str, "String.prototype.includes");
// Check if the search string is a regExp and fail if it is.
Handle<Object> search = args.atOrUndefined(isolate, 1);
Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
if (is_reg_exp.IsNothing()) {
DCHECK(isolate->has_pending_exception());
return isolate->heap()->exception();
}
if (is_reg_exp.FromJust()) {
THROW_NEW_ERROR_RETURN_FAILURE(
isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
isolate->factory()->NewStringFromStaticChars(
"String.prototype.includes")));
}
Handle<String> search_string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
Object::ToString(isolate, search));
Handle<Object> position;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, position,
Object::ToInteger(isolate, args.atOrUndefined(isolate, 2)));
uint32_t index = str->ToValidIndex(*position);
int index_in_str = String::IndexOf(isolate, str, search_string, index);
return *isolate->factory()->ToBoolean(index_in_str != -1);
}
// ES6 section 21.1.3.9
// String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf) {
HandleScope handle_scope(isolate);
return String::LastIndexOf(isolate, args.receiver(),
args.atOrUndefined(isolate, 1),
args.atOrUndefined(isolate, 2));
}
// ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
//
// This function is implementation specific. For now, we do not
// do anything locale specific.
// If internationalization is enabled, then intl.js will override this function
// and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeLocaleCompare) {
HandleScope handle_scope(isolate);
DCHECK_EQ(2, args.length());
TO_THIS_STRING(str1, "String.prototype.localeCompare");
Handle<String> str2;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
Object::ToString(isolate, args.at(1)));
if (str1.is_identical_to(str2)) return Smi::kZero; // Equal.
int str1_length = str1->length();
int str2_length = str2->length();
// Decide trivial cases without flattening.
if (str1_length == 0) {
if (str2_length == 0) return Smi::kZero; // Equal.
return Smi::FromInt(-str2_length);
} else {
if (str2_length == 0) return Smi::FromInt(str1_length);
}
int end = str1_length < str2_length ? str1_length : str2_length;
// No need to flatten if we are going to find the answer on the first
// character. At this point we know there is at least one character
// in each string, due to the trivial case handling above.
int d = str1->Get(0) - str2->Get(0);
if (d != 0) return Smi::FromInt(d);
str1 = String::Flatten(str1);
str2 = String::Flatten(str2);
DisallowHeapAllocation no_gc;
String::FlatContent flat1 = str1->GetFlatContent();
String::FlatContent flat2 = str2->GetFlatContent();
for (int i = 0; i < end; i++) {
if (flat1.Get(i) != flat2.Get(i)) {
return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
}
}
return Smi::FromInt(str1_length - str2_length);
}
#ifndef V8_INTL_SUPPORT
// ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
//
// Simply checks the argument is valid and returns the string itself.
// If internationalization is enabled, then intl.js will override this function
// and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize) {
HandleScope handle_scope(isolate);
TO_THIS_STRING(string, "String.prototype.normalize");
Handle<Object> form_input = args.atOrUndefined(isolate, 1);
if (form_input->IsUndefined(isolate)) return *string;
Handle<String> form;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
Object::ToString(isolate, form_input));
if (!(String::Equals(form,
isolate->factory()->NewStringFromStaticChars("NFC")) ||
String::Equals(form,
isolate->factory()->NewStringFromStaticChars("NFD")) ||
String::Equals(form,
isolate->factory()->NewStringFromStaticChars("NFKC")) ||
String::Equals(form,
isolate->factory()->NewStringFromStaticChars("NFKD")))) {
Handle<String> valid_forms =
isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
THROW_NEW_ERROR_RETURN_FAILURE(
isolate,
NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
}
return *string;
}
#endif // !V8_INTL_SUPPORT
BUILTIN(StringPrototypeStartsWith) {
HandleScope handle_scope(isolate);
TO_THIS_STRING(str, "String.prototype.startsWith");
// Check if the search string is a regExp and fail if it is.
Handle<Object> search = args.atOrUndefined(isolate, 1);
Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
if (is_reg_exp.IsNothing()) {
DCHECK(isolate->has_pending_exception());
return isolate->heap()->exception();
}
if (is_reg_exp.FromJust()) {
THROW_NEW_ERROR_RETURN_FAILURE(
isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
isolate->factory()->NewStringFromStaticChars(
"String.prototype.startsWith")));
}
Handle<String> search_string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
Object::ToString(isolate, search));
Handle<Object> position = args.atOrUndefined(isolate, 2);
int start;
if (position->IsUndefined(isolate)) {
start = 0;
} else {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
Object::ToInteger(isolate, position));
start = str->ToValidIndex(*position);
}
if (start + search_string->length() > str->length()) {
return isolate->heap()->false_value();
}
FlatStringReader str_reader(isolate, String::Flatten(str));
FlatStringReader search_reader(isolate, String::Flatten(search_string));
for (int i = 0; i < search_string->length(); i++) {
if (str_reader.Get(start + i) != search_reader.Get(i)) {
return isolate->heap()->false_value();
}
}
return isolate->heap()->true_value();
}
// ES6 section 21.1.3.27 String.prototype.trim ()
BUILTIN(StringPrototypeTrim) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.trim");
return *String::Trim(string, String::kTrim);
}
// Non-standard WebKit extension
BUILTIN(StringPrototypeTrimLeft) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.trimLeft");
return *String::Trim(string, String::kTrimLeft);
}
// Non-standard WebKit extension
BUILTIN(StringPrototypeTrimRight) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.trimRight");
return *String::Trim(string, String::kTrimRight);
}
namespace {
inline bool ToUpperOverflows(uc32 character) {
// y with umlauts and the micro sign are the only characters that stop
// fitting into one-byte when converting to uppercase.
static const uc32 yuml_code = 0xff;
static const uc32 micro_code = 0xb5;
return (character == yuml_code || character == micro_code);
}
template <class Converter>
MUST_USE_RESULT static Object* ConvertCaseHelper(
Isolate* isolate, String* string, SeqString* result, int result_length,
unibrow::Mapping<Converter, 128>* mapping) {
DisallowHeapAllocation no_gc;
// We try this twice, once with the assumption that the result is no longer
// than the input and, if that assumption breaks, again with the exact
// length. This may not be pretty, but it is nicer than what was here before
// and I hereby claim my vaffel-is.
//
// NOTE: This assumes that the upper/lower case of an ASCII
// character is also ASCII. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
bool has_changed_character = false;
// Convert all characters to upper case, assuming that they will fit
// in the buffer
StringCharacterStream stream(string);
unibrow::uchar chars[Converter::kMaxWidth];
// We can assume that the string is not empty
uc32 current = stream.GetNext();
bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
for (int i = 0; i < result_length;) {
bool has_next = stream.HasMore();
uc32 next = has_next ? stream.GetNext() : 0;
int char_length = mapping->get(current, next, chars);
if (char_length == 0) {
// The case conversion of this character is the character itself.
result->Set(i, current);
i++;
} else if (char_length == 1 &&
(ignore_overflow || !ToUpperOverflows(current))) {
// Common case: converting the letter resulted in one character.
DCHECK(static_cast<uc32>(chars[0]) != current);
result->Set(i, chars[0]);
has_changed_character = true;
i++;
} else if (result_length == string->length()) {
bool overflows = ToUpperOverflows(current);
// We've assumed that the result would be as long as the
// input but here is a character that converts to several
// characters. No matter, we calculate the exact length
// of the result and try the whole thing again.
//
// Note that this leaves room for optimization. We could just
// memcpy what we already have to the result string. Also,
// the result string is the last object allocated we could
// "realloc" it and probably, in the vast majority of cases,
// extend the existing string to be able to hold the full
// result.
int next_length = 0;
if (has_next) {
next_length = mapping->get(next, 0, chars);
if (next_length == 0) next_length = 1;
}
int current_length = i + char_length + next_length;
while (stream.HasMore()) {
current = stream.GetNext();
overflows |= ToUpperOverflows(current);
// NOTE: we use 0 as the next character here because, while
// the next character may affect what a character converts to,
// it does not in any case affect the length of what it convert
// to.
int char_length = mapping->get(current, 0, chars);
if (char_length == 0) char_length = 1;
current_length += char_length;
if (current_length > String::kMaxLength) {
AllowHeapAllocation allocate_error_and_return;
THROW_NEW_ERROR_RETURN_FAILURE(isolate,
NewInvalidStringLengthError());
}
}
// Try again with the real length. Return signed if we need
// to allocate a two-byte string for to uppercase.
return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
: Smi::FromInt(current_length);
} else {
for (int j = 0; j < char_length; j++) {
result->Set(i, chars[j]);
i++;
}
has_changed_character = true;
}
current = next;
}
if (has_changed_character) {
return result;
} else {
// If we didn't actually change anything in doing the conversion
// we simple return the result and let the converted string
// become garbage; there is no reason to keep two identical strings
// alive.
return string;
}
}
template <class Converter>
MUST_USE_RESULT static Object* ConvertCase(
Handle<String> s, Isolate* isolate,
unibrow::Mapping<Converter, 128>* mapping) {
s = String::Flatten(s);
int length = s->length();
// Assume that the string is not empty; we need this assumption later
if (length == 0) return *s;
// Simpler handling of ASCII strings.
//
// NOTE: This assumes that the upper/lower case of an ASCII
// character is also ASCII. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
if (s->IsOneByteRepresentationUnderneath()) {
// Same length as input.
Handle<SeqOneByteString> result =
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
DisallowHeapAllocation no_gc;
String::FlatContent flat_content = s->GetFlatContent();
DCHECK(flat_content.IsFlat());
bool has_changed_character = false;
int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
length, &has_changed_character);
// If not ASCII, we discard the result and take the 2 byte path.
if (index_to_first_unprocessed == length)
return has_changed_character ? *result : *s;
}
Handle<SeqString> result; // Same length as input.
if (s->IsOneByteRepresentation()) {
result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
} else {
result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
}
Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
if (answer->IsException(isolate) || answer->IsString()) return answer;
DCHECK(answer->IsSmi());
length = Smi::cast(answer)->value();
if (s->IsOneByteRepresentation() && length > 0) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawOneByteString(length));
} else {
if (length < 0) length = -length;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawTwoByteString(length));
}
return ConvertCaseHelper(isolate, *s, *result, length, mapping);
}
} // namespace
BUILTIN(StringPrototypeToLocaleLowerCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_lower_mapping());
}
BUILTIN(StringPrototypeToLocaleUpperCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_upper_mapping());
}
BUILTIN(StringPrototypeToLowerCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toLowerCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_lower_mapping());
}
BUILTIN(StringPrototypeToUpperCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toUpperCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_upper_mapping());
}
} // namespace internal
} // namespace v8