icu/source/i18n/search.cpp

446 lines
15 KiB
C++

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 03/22/2000 helena Creation.
**********************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/schriter.h"
#include "unicode/search.h"
#include "usrchimp.h"
#include "cmemory.h"
// public constructors and destructors -----------------------------------
U_NAMESPACE_BEGIN
SearchIterator::SearchIterator(const SearchIterator &other)
: UObject(other)
{
m_breakiterator_ = other.m_breakiterator_;
m_text_ = other.m_text_;
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
m_search_->breakIter = other.m_search_->breakIter;
m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
m_search_->isOverlap = other.m_search_->isOverlap;
m_search_->elementComparisonType = other.m_search_->elementComparisonType;
m_search_->matchedIndex = other.m_search_->matchedIndex;
m_search_->matchedLength = other.m_search_->matchedLength;
m_search_->text = other.m_search_->text;
m_search_->textLength = other.m_search_->textLength;
}
SearchIterator::~SearchIterator()
{
if (m_search_ != NULL) {
uprv_free(m_search_);
}
}
// public get and set methods ----------------------------------------
void SearchIterator::setAttribute(USearchAttribute attribute,
USearchAttributeValue value,
UErrorCode &status)
{
if (U_SUCCESS(status)) {
switch (attribute)
{
case USEARCH_OVERLAP :
m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
break;
case USEARCH_CANONICAL_MATCH :
m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
break;
case USEARCH_ELEMENT_COMPARISON :
if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
m_search_->elementComparisonType = (int16_t)value;
} else {
m_search_->elementComparisonType = 0;
}
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
USearchAttributeValue SearchIterator::getAttribute(
USearchAttribute attribute) const
{
switch (attribute) {
case USEARCH_OVERLAP :
return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
case USEARCH_CANONICAL_MATCH :
return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
USEARCH_OFF);
case USEARCH_ELEMENT_COMPARISON :
{
int16_t value = m_search_->elementComparisonType;
if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
return (USearchAttributeValue)value;
} else {
return USEARCH_STANDARD_ELEMENT_COMPARISON;
}
}
default :
return USEARCH_DEFAULT;
}
}
int32_t SearchIterator::getMatchedStart() const
{
return m_search_->matchedIndex;
}
int32_t SearchIterator::getMatchedLength() const
{
return m_search_->matchedLength;
}
void SearchIterator::getMatchedText(UnicodeString &result) const
{
int32_t matchedindex = m_search_->matchedIndex;
int32_t matchedlength = m_search_->matchedLength;
if (matchedindex != USEARCH_DONE && matchedlength != 0) {
result.setTo(m_search_->text + matchedindex, matchedlength);
}
else {
result.remove();
}
}
void SearchIterator::setBreakIterator(BreakIterator *breakiter,
UErrorCode &status)
{
if (U_SUCCESS(status)) {
#if 0
m_search_->breakIter = NULL;
// the c++ breakiterator may not make use of ubreakiterator.
// so we'll have to keep track of it ourselves.
#else
// Well, gee... the Constructors that take a BreakIterator
// all cast the BreakIterator to a UBreakIterator and
// pass it to the corresponding usearch_openFromXXX
// routine, so there's no reason not to do this.
//
// Besides, a UBreakIterator is a BreakIterator, so
// any subclass of BreakIterator should work fine here...
m_search_->breakIter = (UBreakIterator *) breakiter;
#endif
m_breakiterator_ = breakiter;
}
}
const BreakIterator * SearchIterator::getBreakIterator(void) const
{
return m_breakiterator_;
}
void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
{
if (U_SUCCESS(status)) {
if (text.length() == 0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
else {
m_text_ = text;
m_search_->text = m_text_.getBuffer();
m_search_->textLength = m_text_.length();
}
}
}
void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
{
if (U_SUCCESS(status)) {
text.getText(m_text_);
setText(m_text_, status);
}
}
const UnicodeString & SearchIterator::getText(void) const
{
return m_text_;
}
// operator overloading ----------------------------------------------
UBool SearchIterator::operator==(const SearchIterator &that) const
{
if (this == &that) {
return TRUE;
}
return (m_breakiterator_ == that.m_breakiterator_ &&
m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
m_search_->isOverlap == that.m_search_->isOverlap &&
m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
m_search_->matchedIndex == that.m_search_->matchedIndex &&
m_search_->matchedLength == that.m_search_->matchedLength &&
m_search_->textLength == that.m_search_->textLength &&
getOffset() == that.getOffset() &&
(uprv_memcmp(m_search_->text, that.m_search_->text,
m_search_->textLength * sizeof(UChar)) == 0));
}
// public methods ----------------------------------------------------
int32_t SearchIterator::first(UErrorCode &status)
{
if (U_FAILURE(status)) {
return USEARCH_DONE;
}
setOffset(0, status);
return handleNext(0, status);
}
int32_t SearchIterator::following(int32_t position,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return USEARCH_DONE;
}
setOffset(position, status);
return handleNext(position, status);
}
int32_t SearchIterator::last(UErrorCode &status)
{
if (U_FAILURE(status)) {
return USEARCH_DONE;
}
setOffset(m_search_->textLength, status);
return handlePrev(m_search_->textLength, status);
}
int32_t SearchIterator::preceding(int32_t position,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return USEARCH_DONE;
}
setOffset(position, status);
return handlePrev(position, status);
}
int32_t SearchIterator::next(UErrorCode &status)
{
if (U_SUCCESS(status)) {
int32_t offset = getOffset();
int32_t matchindex = m_search_->matchedIndex;
int32_t matchlength = m_search_->matchedLength;
m_search_->reset = FALSE;
if (m_search_->isForwardSearching == TRUE) {
int32_t textlength = m_search_->textLength;
if (offset == textlength || matchindex == textlength ||
(matchindex != USEARCH_DONE &&
matchindex + matchlength >= textlength)) {
// not enough characters to match
setMatchNotFound();
return USEARCH_DONE;
}
}
else {
// switching direction.
// if matchedIndex == USEARCH_DONE, it means that either a
// setOffset has been called or that previous ran off the text
// string. the iterator would have been set to offset 0 if a
// match is not found.
m_search_->isForwardSearching = TRUE;
if (m_search_->matchedIndex != USEARCH_DONE) {
// there's no need to set the collation element iterator
// the next call to next will set the offset.
return matchindex;
}
}
if (matchlength > 0) {
// if matchlength is 0 we are at the start of the iteration
if (m_search_->isOverlap) {
offset ++;
}
else {
offset += matchlength;
}
}
return handleNext(offset, status);
}
return USEARCH_DONE;
}
int32_t SearchIterator::previous(UErrorCode &status)
{
if (U_SUCCESS(status)) {
int32_t offset;
if (m_search_->reset) {
offset = m_search_->textLength;
m_search_->isForwardSearching = FALSE;
m_search_->reset = FALSE;
setOffset(offset, status);
}
else {
offset = getOffset();
}
int32_t matchindex = m_search_->matchedIndex;
if (m_search_->isForwardSearching == TRUE) {
// switching direction.
// if matchedIndex == USEARCH_DONE, it means that either a
// setOffset has been called or that next ran off the text
// string. the iterator would have been set to offset textLength if
// a match is not found.
m_search_->isForwardSearching = FALSE;
if (matchindex != USEARCH_DONE) {
return matchindex;
}
}
else {
if (offset == 0 || matchindex == 0) {
// not enough characters to match
setMatchNotFound();
return USEARCH_DONE;
}
}
if (matchindex != USEARCH_DONE) {
if (m_search_->isOverlap) {
matchindex += m_search_->matchedLength - 2;
}
return handlePrev(matchindex, status);
}
return handlePrev(offset, status);
}
return USEARCH_DONE;
}
void SearchIterator::reset()
{
UErrorCode status = U_ZERO_ERROR;
setMatchNotFound();
setOffset(0, status);
m_search_->isOverlap = FALSE;
m_search_->isCanonicalMatch = FALSE;
m_search_->elementComparisonType = 0;
m_search_->isForwardSearching = TRUE;
m_search_->reset = TRUE;
}
// protected constructors and destructors -----------------------------
SearchIterator::SearchIterator()
{
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
m_search_->breakIter = NULL;
m_search_->isOverlap = FALSE;
m_search_->isCanonicalMatch = FALSE;
m_search_->elementComparisonType = 0;
m_search_->isForwardSearching = TRUE;
m_search_->reset = TRUE;
m_search_->matchedIndex = USEARCH_DONE;
m_search_->matchedLength = 0;
m_search_->text = NULL;
m_search_->textLength = 0;
m_breakiterator_ = NULL;
}
SearchIterator::SearchIterator(const UnicodeString &text,
BreakIterator *breakiter) :
m_breakiterator_(breakiter),
m_text_(text)
{
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
m_search_->breakIter = NULL;
m_search_->isOverlap = FALSE;
m_search_->isCanonicalMatch = FALSE;
m_search_->elementComparisonType = 0;
m_search_->isForwardSearching = TRUE;
m_search_->reset = TRUE;
m_search_->matchedIndex = USEARCH_DONE;
m_search_->matchedLength = 0;
m_search_->text = m_text_.getBuffer();
m_search_->textLength = text.length();
}
SearchIterator::SearchIterator(CharacterIterator &text,
BreakIterator *breakiter) :
m_breakiterator_(breakiter)
{
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
m_search_->breakIter = NULL;
m_search_->isOverlap = FALSE;
m_search_->isCanonicalMatch = FALSE;
m_search_->elementComparisonType = 0;
m_search_->isForwardSearching = TRUE;
m_search_->reset = TRUE;
m_search_->matchedIndex = USEARCH_DONE;
m_search_->matchedLength = 0;
text.getText(m_text_);
m_search_->text = m_text_.getBuffer();
m_search_->textLength = m_text_.length();
m_breakiterator_ = breakiter;
}
// protected methods ------------------------------------------------------
SearchIterator & SearchIterator::operator=(const SearchIterator &that)
{
if (this != &that) {
m_breakiterator_ = that.m_breakiterator_;
m_text_ = that.m_text_;
m_search_->breakIter = that.m_search_->breakIter;
m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
m_search_->isOverlap = that.m_search_->isOverlap;
m_search_->elementComparisonType = that.m_search_->elementComparisonType;
m_search_->matchedIndex = that.m_search_->matchedIndex;
m_search_->matchedLength = that.m_search_->matchedLength;
m_search_->text = that.m_search_->text;
m_search_->textLength = that.m_search_->textLength;
}
return *this;
}
void SearchIterator::setMatchLength(int32_t length)
{
m_search_->matchedLength = length;
}
void SearchIterator::setMatchStart(int32_t position)
{
m_search_->matchedIndex = position;
}
void SearchIterator::setMatchNotFound()
{
setMatchStart(USEARCH_DONE);
setMatchLength(0);
UErrorCode status = U_ZERO_ERROR;
// by default no errors should be returned here since offsets are within
// range.
if (m_search_->isForwardSearching) {
setOffset(m_search_->textLength, status);
}
else {
setOffset(0, status);
}
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_COLLATION */