446 lines
15 KiB
C++
446 lines
15 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
**********************************************************************
|
|
* Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
|
|
**********************************************************************
|
|
* Date Name Description
|
|
* 03/22/2000 helena Creation.
|
|
**********************************************************************
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
|
|
|
#include "unicode/brkiter.h"
|
|
#include "unicode/schriter.h"
|
|
#include "unicode/search.h"
|
|
#include "usrchimp.h"
|
|
#include "cmemory.h"
|
|
|
|
// public constructors and destructors -----------------------------------
|
|
U_NAMESPACE_BEGIN
|
|
|
|
SearchIterator::SearchIterator(const SearchIterator &other)
|
|
: UObject(other)
|
|
{
|
|
m_breakiterator_ = other.m_breakiterator_;
|
|
m_text_ = other.m_text_;
|
|
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
|
m_search_->breakIter = other.m_search_->breakIter;
|
|
m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
|
|
m_search_->isOverlap = other.m_search_->isOverlap;
|
|
m_search_->elementComparisonType = other.m_search_->elementComparisonType;
|
|
m_search_->matchedIndex = other.m_search_->matchedIndex;
|
|
m_search_->matchedLength = other.m_search_->matchedLength;
|
|
m_search_->text = other.m_search_->text;
|
|
m_search_->textLength = other.m_search_->textLength;
|
|
}
|
|
|
|
SearchIterator::~SearchIterator()
|
|
{
|
|
if (m_search_ != NULL) {
|
|
uprv_free(m_search_);
|
|
}
|
|
}
|
|
|
|
// public get and set methods ----------------------------------------
|
|
|
|
void SearchIterator::setAttribute(USearchAttribute attribute,
|
|
USearchAttributeValue value,
|
|
UErrorCode &status)
|
|
{
|
|
if (U_SUCCESS(status)) {
|
|
switch (attribute)
|
|
{
|
|
case USEARCH_OVERLAP :
|
|
m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
|
|
break;
|
|
case USEARCH_CANONICAL_MATCH :
|
|
m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
|
|
break;
|
|
case USEARCH_ELEMENT_COMPARISON :
|
|
if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
|
|
m_search_->elementComparisonType = (int16_t)value;
|
|
} else {
|
|
m_search_->elementComparisonType = 0;
|
|
}
|
|
break;
|
|
default:
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
}
|
|
if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
}
|
|
|
|
USearchAttributeValue SearchIterator::getAttribute(
|
|
USearchAttribute attribute) const
|
|
{
|
|
switch (attribute) {
|
|
case USEARCH_OVERLAP :
|
|
return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
|
|
case USEARCH_CANONICAL_MATCH :
|
|
return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
|
|
USEARCH_OFF);
|
|
case USEARCH_ELEMENT_COMPARISON :
|
|
{
|
|
int16_t value = m_search_->elementComparisonType;
|
|
if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
|
|
return (USearchAttributeValue)value;
|
|
} else {
|
|
return USEARCH_STANDARD_ELEMENT_COMPARISON;
|
|
}
|
|
}
|
|
default :
|
|
return USEARCH_DEFAULT;
|
|
}
|
|
}
|
|
|
|
int32_t SearchIterator::getMatchedStart() const
|
|
{
|
|
return m_search_->matchedIndex;
|
|
}
|
|
|
|
int32_t SearchIterator::getMatchedLength() const
|
|
{
|
|
return m_search_->matchedLength;
|
|
}
|
|
|
|
void SearchIterator::getMatchedText(UnicodeString &result) const
|
|
{
|
|
int32_t matchedindex = m_search_->matchedIndex;
|
|
int32_t matchedlength = m_search_->matchedLength;
|
|
if (matchedindex != USEARCH_DONE && matchedlength != 0) {
|
|
result.setTo(m_search_->text + matchedindex, matchedlength);
|
|
}
|
|
else {
|
|
result.remove();
|
|
}
|
|
}
|
|
|
|
void SearchIterator::setBreakIterator(BreakIterator *breakiter,
|
|
UErrorCode &status)
|
|
{
|
|
if (U_SUCCESS(status)) {
|
|
#if 0
|
|
m_search_->breakIter = NULL;
|
|
// the c++ breakiterator may not make use of ubreakiterator.
|
|
// so we'll have to keep track of it ourselves.
|
|
#else
|
|
// Well, gee... the Constructors that take a BreakIterator
|
|
// all cast the BreakIterator to a UBreakIterator and
|
|
// pass it to the corresponding usearch_openFromXXX
|
|
// routine, so there's no reason not to do this.
|
|
//
|
|
// Besides, a UBreakIterator is a BreakIterator, so
|
|
// any subclass of BreakIterator should work fine here...
|
|
m_search_->breakIter = (UBreakIterator *) breakiter;
|
|
#endif
|
|
|
|
m_breakiterator_ = breakiter;
|
|
}
|
|
}
|
|
|
|
const BreakIterator * SearchIterator::getBreakIterator(void) const
|
|
{
|
|
return m_breakiterator_;
|
|
}
|
|
|
|
void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
|
|
{
|
|
if (U_SUCCESS(status)) {
|
|
if (text.length() == 0) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
else {
|
|
m_text_ = text;
|
|
m_search_->text = m_text_.getBuffer();
|
|
m_search_->textLength = m_text_.length();
|
|
}
|
|
}
|
|
}
|
|
|
|
void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
|
|
{
|
|
if (U_SUCCESS(status)) {
|
|
text.getText(m_text_);
|
|
setText(m_text_, status);
|
|
}
|
|
}
|
|
|
|
const UnicodeString & SearchIterator::getText(void) const
|
|
{
|
|
return m_text_;
|
|
}
|
|
|
|
// operator overloading ----------------------------------------------
|
|
|
|
UBool SearchIterator::operator==(const SearchIterator &that) const
|
|
{
|
|
if (this == &that) {
|
|
return TRUE;
|
|
}
|
|
return (m_breakiterator_ == that.m_breakiterator_ &&
|
|
m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
|
|
m_search_->isOverlap == that.m_search_->isOverlap &&
|
|
m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
|
|
m_search_->matchedIndex == that.m_search_->matchedIndex &&
|
|
m_search_->matchedLength == that.m_search_->matchedLength &&
|
|
m_search_->textLength == that.m_search_->textLength &&
|
|
getOffset() == that.getOffset() &&
|
|
(uprv_memcmp(m_search_->text, that.m_search_->text,
|
|
m_search_->textLength * sizeof(UChar)) == 0));
|
|
}
|
|
|
|
// public methods ----------------------------------------------------
|
|
|
|
int32_t SearchIterator::first(UErrorCode &status)
|
|
{
|
|
if (U_FAILURE(status)) {
|
|
return USEARCH_DONE;
|
|
}
|
|
setOffset(0, status);
|
|
return handleNext(0, status);
|
|
}
|
|
|
|
int32_t SearchIterator::following(int32_t position,
|
|
UErrorCode &status)
|
|
{
|
|
if (U_FAILURE(status)) {
|
|
return USEARCH_DONE;
|
|
}
|
|
setOffset(position, status);
|
|
return handleNext(position, status);
|
|
}
|
|
|
|
int32_t SearchIterator::last(UErrorCode &status)
|
|
{
|
|
if (U_FAILURE(status)) {
|
|
return USEARCH_DONE;
|
|
}
|
|
setOffset(m_search_->textLength, status);
|
|
return handlePrev(m_search_->textLength, status);
|
|
}
|
|
|
|
int32_t SearchIterator::preceding(int32_t position,
|
|
UErrorCode &status)
|
|
{
|
|
if (U_FAILURE(status)) {
|
|
return USEARCH_DONE;
|
|
}
|
|
setOffset(position, status);
|
|
return handlePrev(position, status);
|
|
}
|
|
|
|
int32_t SearchIterator::next(UErrorCode &status)
|
|
{
|
|
if (U_SUCCESS(status)) {
|
|
int32_t offset = getOffset();
|
|
int32_t matchindex = m_search_->matchedIndex;
|
|
int32_t matchlength = m_search_->matchedLength;
|
|
m_search_->reset = FALSE;
|
|
if (m_search_->isForwardSearching == TRUE) {
|
|
int32_t textlength = m_search_->textLength;
|
|
if (offset == textlength || matchindex == textlength ||
|
|
(matchindex != USEARCH_DONE &&
|
|
matchindex + matchlength >= textlength)) {
|
|
// not enough characters to match
|
|
setMatchNotFound();
|
|
return USEARCH_DONE;
|
|
}
|
|
}
|
|
else {
|
|
// switching direction.
|
|
// if matchedIndex == USEARCH_DONE, it means that either a
|
|
// setOffset has been called or that previous ran off the text
|
|
// string. the iterator would have been set to offset 0 if a
|
|
// match is not found.
|
|
m_search_->isForwardSearching = TRUE;
|
|
if (m_search_->matchedIndex != USEARCH_DONE) {
|
|
// there's no need to set the collation element iterator
|
|
// the next call to next will set the offset.
|
|
return matchindex;
|
|
}
|
|
}
|
|
|
|
if (matchlength > 0) {
|
|
// if matchlength is 0 we are at the start of the iteration
|
|
if (m_search_->isOverlap) {
|
|
offset ++;
|
|
}
|
|
else {
|
|
offset += matchlength;
|
|
}
|
|
}
|
|
return handleNext(offset, status);
|
|
}
|
|
return USEARCH_DONE;
|
|
}
|
|
|
|
int32_t SearchIterator::previous(UErrorCode &status)
|
|
{
|
|
if (U_SUCCESS(status)) {
|
|
int32_t offset;
|
|
if (m_search_->reset) {
|
|
offset = m_search_->textLength;
|
|
m_search_->isForwardSearching = FALSE;
|
|
m_search_->reset = FALSE;
|
|
setOffset(offset, status);
|
|
}
|
|
else {
|
|
offset = getOffset();
|
|
}
|
|
|
|
int32_t matchindex = m_search_->matchedIndex;
|
|
if (m_search_->isForwardSearching == TRUE) {
|
|
// switching direction.
|
|
// if matchedIndex == USEARCH_DONE, it means that either a
|
|
// setOffset has been called or that next ran off the text
|
|
// string. the iterator would have been set to offset textLength if
|
|
// a match is not found.
|
|
m_search_->isForwardSearching = FALSE;
|
|
if (matchindex != USEARCH_DONE) {
|
|
return matchindex;
|
|
}
|
|
}
|
|
else {
|
|
if (offset == 0 || matchindex == 0) {
|
|
// not enough characters to match
|
|
setMatchNotFound();
|
|
return USEARCH_DONE;
|
|
}
|
|
}
|
|
|
|
if (matchindex != USEARCH_DONE) {
|
|
if (m_search_->isOverlap) {
|
|
matchindex += m_search_->matchedLength - 2;
|
|
}
|
|
|
|
return handlePrev(matchindex, status);
|
|
}
|
|
|
|
return handlePrev(offset, status);
|
|
}
|
|
|
|
return USEARCH_DONE;
|
|
}
|
|
|
|
void SearchIterator::reset()
|
|
{
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
setMatchNotFound();
|
|
setOffset(0, status);
|
|
m_search_->isOverlap = FALSE;
|
|
m_search_->isCanonicalMatch = FALSE;
|
|
m_search_->elementComparisonType = 0;
|
|
m_search_->isForwardSearching = TRUE;
|
|
m_search_->reset = TRUE;
|
|
}
|
|
|
|
// protected constructors and destructors -----------------------------
|
|
|
|
SearchIterator::SearchIterator()
|
|
{
|
|
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
|
m_search_->breakIter = NULL;
|
|
m_search_->isOverlap = FALSE;
|
|
m_search_->isCanonicalMatch = FALSE;
|
|
m_search_->elementComparisonType = 0;
|
|
m_search_->isForwardSearching = TRUE;
|
|
m_search_->reset = TRUE;
|
|
m_search_->matchedIndex = USEARCH_DONE;
|
|
m_search_->matchedLength = 0;
|
|
m_search_->text = NULL;
|
|
m_search_->textLength = 0;
|
|
m_breakiterator_ = NULL;
|
|
}
|
|
|
|
SearchIterator::SearchIterator(const UnicodeString &text,
|
|
BreakIterator *breakiter) :
|
|
m_breakiterator_(breakiter),
|
|
m_text_(text)
|
|
{
|
|
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
|
m_search_->breakIter = NULL;
|
|
m_search_->isOverlap = FALSE;
|
|
m_search_->isCanonicalMatch = FALSE;
|
|
m_search_->elementComparisonType = 0;
|
|
m_search_->isForwardSearching = TRUE;
|
|
m_search_->reset = TRUE;
|
|
m_search_->matchedIndex = USEARCH_DONE;
|
|
m_search_->matchedLength = 0;
|
|
m_search_->text = m_text_.getBuffer();
|
|
m_search_->textLength = text.length();
|
|
}
|
|
|
|
SearchIterator::SearchIterator(CharacterIterator &text,
|
|
BreakIterator *breakiter) :
|
|
m_breakiterator_(breakiter)
|
|
{
|
|
m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
|
m_search_->breakIter = NULL;
|
|
m_search_->isOverlap = FALSE;
|
|
m_search_->isCanonicalMatch = FALSE;
|
|
m_search_->elementComparisonType = 0;
|
|
m_search_->isForwardSearching = TRUE;
|
|
m_search_->reset = TRUE;
|
|
m_search_->matchedIndex = USEARCH_DONE;
|
|
m_search_->matchedLength = 0;
|
|
text.getText(m_text_);
|
|
m_search_->text = m_text_.getBuffer();
|
|
m_search_->textLength = m_text_.length();
|
|
m_breakiterator_ = breakiter;
|
|
}
|
|
|
|
// protected methods ------------------------------------------------------
|
|
|
|
SearchIterator & SearchIterator::operator=(const SearchIterator &that)
|
|
{
|
|
if (this != &that) {
|
|
m_breakiterator_ = that.m_breakiterator_;
|
|
m_text_ = that.m_text_;
|
|
m_search_->breakIter = that.m_search_->breakIter;
|
|
m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
|
|
m_search_->isOverlap = that.m_search_->isOverlap;
|
|
m_search_->elementComparisonType = that.m_search_->elementComparisonType;
|
|
m_search_->matchedIndex = that.m_search_->matchedIndex;
|
|
m_search_->matchedLength = that.m_search_->matchedLength;
|
|
m_search_->text = that.m_search_->text;
|
|
m_search_->textLength = that.m_search_->textLength;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void SearchIterator::setMatchLength(int32_t length)
|
|
{
|
|
m_search_->matchedLength = length;
|
|
}
|
|
|
|
void SearchIterator::setMatchStart(int32_t position)
|
|
{
|
|
m_search_->matchedIndex = position;
|
|
}
|
|
|
|
void SearchIterator::setMatchNotFound()
|
|
{
|
|
setMatchStart(USEARCH_DONE);
|
|
setMatchLength(0);
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
// by default no errors should be returned here since offsets are within
|
|
// range.
|
|
if (m_search_->isForwardSearching) {
|
|
setOffset(m_search_->textLength, status);
|
|
}
|
|
else {
|
|
setOffset(0, status);
|
|
}
|
|
}
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
#endif /* #if !UCONFIG_NO_COLLATION */
|