icu/source/i18n/collationsettings.cpp

378 lines
13 KiB
C++

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2013-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* collationsettings.cpp
*
* created on: 2013feb07
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/ucol.h"
#include "cmemory.h"
#include "collation.h"
#include "collationdata.h"
#include "collationsettings.h"
#include "sharedobject.h"
#include "uassert.h"
#include "umutex.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
CollationSettings::CollationSettings(const CollationSettings &other)
: SharedObject(other),
options(other.options), variableTop(other.variableTop),
reorderTable(NULL),
minHighNoReorder(other.minHighNoReorder),
reorderRanges(NULL), reorderRangesLength(0),
reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
fastLatinOptions(other.fastLatinOptions) {
UErrorCode errorCode = U_ZERO_ERROR;
copyReorderingFrom(other, errorCode);
if(fastLatinOptions >= 0) {
uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
}
}
CollationSettings::~CollationSettings() {
if(reorderCodesCapacity != 0) {
uprv_free(const_cast<int32_t *>(reorderCodes));
}
}
UBool
CollationSettings::operator==(const CollationSettings &other) const {
if(options != other.options) { return FALSE; }
if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
for(int32_t i = 0; i < reorderCodesLength; ++i) {
if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
}
return TRUE;
}
int32_t
CollationSettings::hashCode() const {
int32_t h = options << 8;
if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
h ^= reorderCodesLength;
for(int32_t i = 0; i < reorderCodesLength; ++i) {
h ^= (reorderCodes[i] << i);
}
return h;
}
void
CollationSettings::resetReordering() {
// When we turn off reordering, we want to set a NULL permutation
// rather than a no-op permutation.
// Keep the memory via reorderCodes and its capacity.
reorderTable = NULL;
minHighNoReorder = 0;
reorderRangesLength = 0;
reorderCodesLength = 0;
}
void
CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
const uint32_t *ranges, int32_t rangesLength,
const uint8_t *table, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
if(table != NULL &&
(rangesLength == 0 ?
!reorderTableHasSplitBytes(table) :
rangesLength >= 2 &&
// The first offset must be 0. The last offset must not be 0.
(ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
// We need to release the memory before setting the alias pointer.
if(reorderCodesCapacity != 0) {
uprv_free(const_cast<int32_t *>(reorderCodes));
reorderCodesCapacity = 0;
}
reorderTable = table;
reorderCodes = codes;
reorderCodesLength = length;
// Drop ranges before the first split byte. They are reordered by the table.
// This then speeds up reordering of the remaining ranges.
int32_t firstSplitByteRangeIndex = 0;
while(firstSplitByteRangeIndex < rangesLength &&
(ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
// The second byte of the primary limit is 0.
++firstSplitByteRangeIndex;
}
if(firstSplitByteRangeIndex == rangesLength) {
U_ASSERT(!reorderTableHasSplitBytes(table));
minHighNoReorder = 0;
reorderRanges = NULL;
reorderRangesLength = 0;
} else {
U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
reorderRanges = ranges + firstSplitByteRangeIndex;
reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
}
return;
}
// Regenerate missing data.
setReordering(data, codes, length, errorCode);
}
void
CollationSettings::setReordering(const CollationData &data,
const int32_t *codes, int32_t codesLength,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
resetReordering();
return;
}
UVector32 rangesList(errorCode);
data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
if(U_FAILURE(errorCode)) { return; }
int32_t rangesLength = rangesList.size();
if(rangesLength == 0) {
resetReordering();
return;
}
const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
// ranges[] contains at least two (limit, offset) pairs.
// The first offset must be 0. The last offset must not be 0.
// Separators (at the low end) and trailing weights (at the high end)
// are never reordered.
U_ASSERT(rangesLength >= 2);
U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
// Write the lead byte permutation table.
// Set a 0 for each lead byte that has a range boundary in the middle.
uint8_t table[256];
int32_t b = 0;
int32_t firstSplitByteRangeIndex = -1;
for(int32_t i = 0; i < rangesLength; ++i) {
uint32_t pair = ranges[i];
int32_t limit1 = (int32_t)(pair >> 24);
while(b < limit1) {
table[b] = (uint8_t)(b + pair);
++b;
}
// Check the second byte of the limit.
if((pair & 0xff0000) != 0) {
table[limit1] = 0;
b = limit1 + 1;
if(firstSplitByteRangeIndex < 0) {
firstSplitByteRangeIndex = i;
}
}
}
while(b <= 0xff) {
table[b] = (uint8_t)b;
++b;
}
if(firstSplitByteRangeIndex < 0) {
// The lead byte permutation table alone suffices for reordering.
rangesLength = 0;
} else {
// Remove the ranges below the first split byte.
ranges += firstSplitByteRangeIndex;
rangesLength -= firstSplitByteRangeIndex;
}
setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
}
void
CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
const uint32_t *ranges, int32_t rangesLength,
const uint8_t *table, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
int32_t *ownedCodes;
int32_t totalLength = codesLength + rangesLength;
U_ASSERT(totalLength > 0);
if(totalLength <= reorderCodesCapacity) {
ownedCodes = const_cast<int32_t *>(reorderCodes);
} else {
// Allocate one memory block for the codes, the ranges, and the 16-aligned table.
int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
if(ownedCodes == NULL) {
resetReordering();
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
if(reorderCodesCapacity != 0) {
uprv_free(const_cast<int32_t *>(reorderCodes));
}
reorderCodes = ownedCodes;
reorderCodesCapacity = capacity;
}
uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
uprv_memcpy(ownedCodes, codes, codesLength * 4);
uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
reorderCodesLength = codesLength;
reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
reorderRangesLength = rangesLength;
}
void
CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
if(!other.hasReordering()) {
resetReordering();
return;
}
minHighNoReorder = other.minHighNoReorder;
if(other.reorderCodesCapacity == 0) {
// The reorder arrays are aliased to memory-mapped data.
reorderTable = other.reorderTable;
reorderRanges = other.reorderRanges;
reorderRangesLength = other.reorderRangesLength;
reorderCodes = other.reorderCodes;
reorderCodesLength = other.reorderCodesLength;
} else {
setReorderArrays(other.reorderCodes, other.reorderCodesLength,
other.reorderRanges, other.reorderRangesLength,
other.reorderTable, errorCode);
}
}
UBool
CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
U_ASSERT(table[0] == 0);
for(int32_t i = 1; i < 256; ++i) {
if(table[i] == 0) {
return TRUE;
}
}
return FALSE;
}
uint32_t
CollationSettings::reorderEx(uint32_t p) const {
if(p >= minHighNoReorder) { return p; }
// Round up p so that its lower 16 bits are >= any offset bits.
// Then compare q directly with (limit, offset) pairs.
uint32_t q = p | 0xffff;
uint32_t r;
const uint32_t *ranges = reorderRanges;
while(q >= (r = *ranges)) { ++ranges; }
return p + (r << 24);
}
void
CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
int32_t noStrength = options & ~STRENGTH_MASK;
switch(value) {
case UCOL_PRIMARY:
case UCOL_SECONDARY:
case UCOL_TERTIARY:
case UCOL_QUATERNARY:
case UCOL_IDENTICAL:
options = noStrength | (value << STRENGTH_SHIFT);
break;
case UCOL_DEFAULT:
options = noStrength | (defaultOptions & STRENGTH_MASK);
break;
default:
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
void
CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
int32_t defaultOptions, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
switch(value) {
case UCOL_ON:
options |= bit;
break;
case UCOL_OFF:
options &= ~bit;
break;
case UCOL_DEFAULT:
options = (options & ~bit) | (defaultOptions & bit);
break;
default:
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
void
CollationSettings::setCaseFirst(UColAttributeValue value,
int32_t defaultOptions, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
switch(value) {
case UCOL_OFF:
options = noCaseFirst;
break;
case UCOL_LOWER_FIRST:
options = noCaseFirst | CASE_FIRST;
break;
case UCOL_UPPER_FIRST:
options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
break;
case UCOL_DEFAULT:
options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
break;
default:
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
void
CollationSettings::setAlternateHandling(UColAttributeValue value,
int32_t defaultOptions, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
int32_t noAlternate = options & ~ALTERNATE_MASK;
switch(value) {
case UCOL_NON_IGNORABLE:
options = noAlternate;
break;
case UCOL_SHIFTED:
options = noAlternate | SHIFTED;
break;
case UCOL_DEFAULT:
options = noAlternate | (defaultOptions & ALTERNATE_MASK);
break;
default:
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
void
CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
int32_t noMax = options & ~MAX_VARIABLE_MASK;
switch(value) {
case MAX_VAR_SPACE:
case MAX_VAR_PUNCT:
case MAX_VAR_SYMBOL:
case MAX_VAR_CURRENCY:
options = noMax | (value << MAX_VARIABLE_SHIFT);
break;
case UCOL_DEFAULT:
options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
break;
default:
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
U_NAMESPACE_END
#endif // !UCONFIG_NO_COLLATION