blob: 4d16b13f51fe996f04b97d386e26500fb39ba16b [file] [log] [blame]
/*
* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All
* rights reserved.
* Copyright (C) 2005 Alexey Proskuryakov.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/core/editing/iterators/text_searcher_icu.h"
#include <unicode/usearch.h>
#include "base/macros.h"
#include "third_party/blink/renderer/platform/text/text_break_iterator_internal_icu.h"
#include "third_party/blink/renderer/platform/text/unicode_utilities.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
namespace blink {
namespace {
UStringSearch* CreateSearcher() {
// Provide a non-empty pattern and non-empty text so usearch_open will not
// fail, but it doesn't matter exactly what it is, since we don't perform any
// searches without setting both the pattern and the text.
UErrorCode status = U_ZERO_ERROR;
String search_collator_name =
CurrentSearchLocaleID() + String("@collation=search");
UStringSearch* searcher =
usearch_open(&kNewlineCharacter, 1, &kNewlineCharacter, 1,
search_collator_name.Utf8().data(), nullptr, &status);
DCHECK(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING ||
status == U_USING_DEFAULT_WARNING)
<< status;
return searcher;
}
class ICULockableSearcher {
public:
static UStringSearch* AcquireSearcher() {
Instance().lock();
return Instance().searcher_;
}
static void ReleaseSearcher() { Instance().unlock(); }
private:
static ICULockableSearcher& Instance() {
static ICULockableSearcher searcher(CreateSearcher());
return searcher;
}
explicit ICULockableSearcher(UStringSearch* searcher) : searcher_(searcher) {}
void lock() {
#if DCHECK_IS_ON()
DCHECK(!locked_);
locked_ = true;
#endif
}
void unlock() {
#if DCHECK_IS_ON()
DCHECK(locked_);
locked_ = false;
#endif
}
UStringSearch* const searcher_ = nullptr;
#if DCHECK_IS_ON()
bool locked_ = false;
#endif
DISALLOW_COPY_AND_ASSIGN(ICULockableSearcher);
};
} // namespace
// Grab the single global searcher.
// If we ever have a reason to do more than once search buffer at once, we'll
// have to move to multiple searchers.
TextSearcherICU::TextSearcherICU()
: searcher_(ICULockableSearcher::AcquireSearcher()) {}
TextSearcherICU::~TextSearcherICU() {
// Leave the static object pointing to valid strings (pattern=target,
// text=buffer). Otheriwse, usearch_reset() will results in 'use-after-free'
// error.
SetPattern(&kNewlineCharacter, 1);
SetText(&kNewlineCharacter, 1);
ICULockableSearcher::ReleaseSearcher();
}
void TextSearcherICU::SetPattern(const StringView& pattern,
bool case_sensitive) {
SetCaseSensitivity(case_sensitive);
SetPattern(pattern.Characters16(), pattern.length());
if (ContainsKanaLetters(pattern.ToString())) {
NormalizeCharactersIntoNFCForm(pattern.Characters16(), pattern.length(),
normalized_search_text_);
}
}
void TextSearcherICU::SetText(const UChar* text, wtf_size_t length) {
UErrorCode status = U_ZERO_ERROR;
usearch_setText(searcher_, text, length, &status);
DCHECK_EQ(status, U_ZERO_ERROR);
text_length_ = length;
}
void TextSearcherICU::SetOffset(wtf_size_t offset) {
UErrorCode status = U_ZERO_ERROR;
usearch_setOffset(searcher_, offset, &status);
DCHECK_EQ(status, U_ZERO_ERROR);
}
bool TextSearcherICU::NextMatchResult(MatchResultICU& result) {
while (NextMatchResultInternal(result)) {
if (!ShouldSkipCurrentMatch(result))
return true;
}
return false;
}
bool TextSearcherICU::NextMatchResultInternal(MatchResultICU& result) {
UErrorCode status = U_ZERO_ERROR;
const int match_start = usearch_next(searcher_, &status);
DCHECK_EQ(status, U_ZERO_ERROR);
// TODO(iceman): It is possible to use |usearch_getText| function
// to retrieve text length and not store it explicitly.
if (!(match_start >= 0 &&
static_cast<wtf_size_t>(match_start) < text_length_)) {
DCHECK_EQ(match_start, USEARCH_DONE);
result.start = 0;
result.length = 0;
return false;
}
result.start = static_cast<wtf_size_t>(match_start);
result.length = usearch_getMatchedLength(searcher_);
return true;
}
bool TextSearcherICU::ShouldSkipCurrentMatch(MatchResultICU& result) const {
if (normalized_search_text_.IsEmpty())
return false;
Vector<UChar> normalized_match;
int32_t text_length;
const UChar* text = usearch_getText(searcher_, &text_length);
DCHECK_LE((int32_t)(result.start + result.length), text_length);
NormalizeCharactersIntoNFCForm(text + result.start, result.length,
normalized_match);
return !CheckOnlyKanaLettersInStrings(
normalized_search_text_.data(), normalized_search_text_.size(),
normalized_match.begin(), normalized_match.size());
}
void TextSearcherICU::SetPattern(const UChar* pattern, wtf_size_t length) {
UErrorCode status = U_ZERO_ERROR;
usearch_setPattern(searcher_, pattern, length, &status);
DCHECK_EQ(status, U_ZERO_ERROR);
}
void TextSearcherICU::SetCaseSensitivity(bool case_sensitive) {
const UCollationStrength strength =
case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY;
UCollator* const collator = usearch_getCollator(searcher_);
if (ucol_getStrength(collator) == strength)
return;
ucol_setStrength(collator, strength);
usearch_reset(searcher_);
}
} // namespace blink