2018-01-13 22:57:36 +01:00
|
|
|
// © 2016 and later: Unicode, Inc. and others.
|
2018-01-13 22:54:20 +01:00
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
/*
|
|
|
|
******************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2001-2014, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
******************************************************************************
|
|
|
|
* file name: utrie2.cpp
|
2018-01-13 22:57:36 +01:00
|
|
|
* encoding: UTF-8
|
2018-01-13 22:54:20 +01:00
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2008aug16 (starting from a copy of utrie.c)
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*
|
|
|
|
* This is a common implementation of a Unicode trie.
|
|
|
|
* It is a kind of compressed, serializable table of 16- or 32-bit values associated with
|
|
|
|
* Unicode code points (0..0x10ffff).
|
|
|
|
* This is the second common version of a Unicode trie (hence the name UTrie2).
|
|
|
|
* See utrie2.h for a comparison.
|
|
|
|
*
|
|
|
|
* This file contains only the runtime and enumeration code, for read-only access.
|
|
|
|
* See utrie2_builder.c for the builder code.
|
|
|
|
*/
|
|
|
|
#ifdef UTRIE2_DEBUG
|
|
|
|
# include <stdio.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/utf.h"
|
|
|
|
#include "unicode/utf8.h"
|
|
|
|
#include "unicode/utf16.h"
|
|
|
|
#include "cmemory.h"
|
|
|
|
#include "utrie2.h"
|
|
|
|
#include "utrie2_impl.h"
|
|
|
|
#include "uassert.h"
|
|
|
|
|
|
|
|
/* Public UTrie2 API implementation ----------------------------------------- */
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
|
|
|
|
int32_t i2, block;
|
|
|
|
|
|
|
|
if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
|
|
|
|
return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
|
|
|
|
}
|
|
|
|
|
|
|
|
if(U_IS_LEAD(c) && fromLSCP) {
|
|
|
|
i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
|
|
|
|
(c>>UTRIE2_SHIFT_2);
|
|
|
|
} else {
|
|
|
|
i2=trie->index1[c>>UTRIE2_SHIFT_1]+
|
|
|
|
((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
|
|
|
|
}
|
|
|
|
block=trie->index2[i2];
|
|
|
|
return trie->data[block+(c&UTRIE2_DATA_MASK)];
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI uint32_t U_EXPORT2
|
|
|
|
utrie2_get32(const UTrie2 *trie, UChar32 c) {
|
|
|
|
if(trie->data16!=NULL) {
|
|
|
|
return UTRIE2_GET16(trie, c);
|
|
|
|
} else if(trie->data32!=NULL) {
|
|
|
|
return UTRIE2_GET32(trie, c);
|
|
|
|
} else if((uint32_t)c>0x10ffff) {
|
|
|
|
return trie->errorValue;
|
|
|
|
} else {
|
|
|
|
return get32(trie->newTrie, c, TRUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI uint32_t U_EXPORT2
|
|
|
|
utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
|
|
|
|
if(!U_IS_LEAD(c)) {
|
|
|
|
return trie->errorValue;
|
|
|
|
}
|
|
|
|
if(trie->data16!=NULL) {
|
|
|
|
return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
|
|
|
|
} else if(trie->data32!=NULL) {
|
|
|
|
return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
|
|
|
|
} else {
|
|
|
|
return get32(trie->newTrie, c, FALSE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int32_t
|
|
|
|
u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
|
|
|
|
int32_t idx=
|
|
|
|
_UTRIE2_INDEX_FROM_CP(
|
|
|
|
trie,
|
|
|
|
trie->data32==NULL ? trie->indexLength : 0,
|
|
|
|
c);
|
|
|
|
return (idx<<3)|i;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
|
|
|
|
const uint8_t *src, const uint8_t *limit) {
|
|
|
|
int32_t i, length;
|
|
|
|
i=0;
|
|
|
|
/* support 64-bit pointers by avoiding cast of arbitrary difference */
|
|
|
|
if((limit-src)<=7) {
|
|
|
|
length=(int32_t)(limit-src);
|
|
|
|
} else {
|
|
|
|
length=7;
|
|
|
|
}
|
|
|
|
c=utf8_nextCharSafeBody(src, &i, length, c, -1);
|
|
|
|
return u8Index(trie, c, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
|
|
|
|
const uint8_t *start, const uint8_t *src) {
|
|
|
|
int32_t i, length;
|
|
|
|
/* support 64-bit pointers by avoiding cast of arbitrary difference */
|
|
|
|
if((src-start)<=7) {
|
|
|
|
i=length=(int32_t)(src-start);
|
|
|
|
} else {
|
|
|
|
i=length=7;
|
|
|
|
start=src-7;
|
|
|
|
}
|
|
|
|
c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
|
|
|
|
i=length-i; /* number of bytes read backward from src */
|
|
|
|
return u8Index(trie, c, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI UTrie2 * U_EXPORT2
|
|
|
|
utrie2_openFromSerialized(UTrie2ValueBits valueBits,
|
|
|
|
const void *data, int32_t length, int32_t *pActualLength,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
const UTrie2Header *header;
|
|
|
|
const uint16_t *p16;
|
|
|
|
int32_t actualLength;
|
|
|
|
|
|
|
|
UTrie2 tempTrie;
|
|
|
|
UTrie2 *trie;
|
|
|
|
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
|
|
|
|
valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
|
|
|
|
) {
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* enough data for a trie header? */
|
|
|
|
if(length<(int32_t)sizeof(UTrie2Header)) {
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check the signature */
|
|
|
|
header=(const UTrie2Header *)data;
|
|
|
|
if(header->signature!=UTRIE2_SIG) {
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the options */
|
|
|
|
if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the length values and offsets */
|
|
|
|
uprv_memset(&tempTrie, 0, sizeof(tempTrie));
|
|
|
|
tempTrie.indexLength=header->indexLength;
|
|
|
|
tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
|
|
|
|
tempTrie.index2NullOffset=header->index2NullOffset;
|
|
|
|
tempTrie.dataNullOffset=header->dataNullOffset;
|
|
|
|
|
|
|
|
tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
|
|
|
|
tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
|
|
|
|
if(valueBits==UTRIE2_16_VALUE_BITS) {
|
|
|
|
tempTrie.highValueIndex+=tempTrie.indexLength;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* calculate the actual length */
|
|
|
|
actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
|
|
|
|
if(valueBits==UTRIE2_16_VALUE_BITS) {
|
|
|
|
actualLength+=tempTrie.dataLength*2;
|
|
|
|
} else {
|
|
|
|
actualLength+=tempTrie.dataLength*4;
|
|
|
|
}
|
|
|
|
if(length<actualLength) {
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* allocate the trie */
|
|
|
|
trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
|
|
|
|
if(trie==NULL) {
|
|
|
|
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
|
|
|
|
trie->memory=(uint32_t *)data;
|
|
|
|
trie->length=actualLength;
|
|
|
|
trie->isMemoryOwned=FALSE;
|
|
|
|
|
|
|
|
/* set the pointers to its index and data arrays */
|
|
|
|
p16=(const uint16_t *)(header+1);
|
|
|
|
trie->index=p16;
|
|
|
|
p16+=trie->indexLength;
|
|
|
|
|
|
|
|
/* get the data */
|
|
|
|
switch(valueBits) {
|
|
|
|
case UTRIE2_16_VALUE_BITS:
|
|
|
|
trie->data16=p16;
|
|
|
|
trie->data32=NULL;
|
|
|
|
trie->initialValue=trie->index[trie->dataNullOffset];
|
|
|
|
trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
|
|
|
|
break;
|
|
|
|
case UTRIE2_32_VALUE_BITS:
|
|
|
|
trie->data16=NULL;
|
|
|
|
trie->data32=(const uint32_t *)p16;
|
|
|
|
trie->initialValue=trie->data32[trie->dataNullOffset];
|
|
|
|
trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(pActualLength!=NULL) {
|
|
|
|
*pActualLength=actualLength;
|
|
|
|
}
|
|
|
|
return trie;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI UTrie2 * U_EXPORT2
|
|
|
|
utrie2_openDummy(UTrie2ValueBits valueBits,
|
|
|
|
uint32_t initialValue, uint32_t errorValue,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
UTrie2 *trie;
|
|
|
|
UTrie2Header *header;
|
|
|
|
uint32_t *p;
|
|
|
|
uint16_t *dest16;
|
|
|
|
int32_t indexLength, dataLength, length, i;
|
|
|
|
int32_t dataMove; /* >0 if the data is moved to the end of the index array */
|
|
|
|
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* calculate the total length of the dummy trie data */
|
|
|
|
indexLength=UTRIE2_INDEX_1_OFFSET;
|
|
|
|
dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
|
|
|
|
length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
|
|
|
|
if(valueBits==UTRIE2_16_VALUE_BITS) {
|
|
|
|
length+=dataLength*2;
|
|
|
|
} else {
|
|
|
|
length+=dataLength*4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* allocate the trie */
|
|
|
|
trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
|
|
|
|
if(trie==NULL) {
|
|
|
|
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
uprv_memset(trie, 0, sizeof(UTrie2));
|
|
|
|
trie->memory=uprv_malloc(length);
|
|
|
|
if(trie->memory==NULL) {
|
|
|
|
uprv_free(trie);
|
|
|
|
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
trie->length=length;
|
|
|
|
trie->isMemoryOwned=TRUE;
|
|
|
|
|
|
|
|
/* set the UTrie2 fields */
|
|
|
|
if(valueBits==UTRIE2_16_VALUE_BITS) {
|
|
|
|
dataMove=indexLength;
|
|
|
|
} else {
|
|
|
|
dataMove=0;
|
|
|
|
}
|
|
|
|
|
|
|
|
trie->indexLength=indexLength;
|
|
|
|
trie->dataLength=dataLength;
|
|
|
|
trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
|
|
|
|
trie->dataNullOffset=(uint16_t)dataMove;
|
|
|
|
trie->initialValue=initialValue;
|
|
|
|
trie->errorValue=errorValue;
|
|
|
|
trie->highStart=0;
|
|
|
|
trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
|
|
|
|
|
|
|
|
/* set the header fields */
|
|
|
|
header=(UTrie2Header *)trie->memory;
|
|
|
|
|
|
|
|
header->signature=UTRIE2_SIG; /* "Tri2" */
|
|
|
|
header->options=(uint16_t)valueBits;
|
|
|
|
|
|
|
|
header->indexLength=(uint16_t)indexLength;
|
|
|
|
header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
|
|
|
|
header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
|
|
|
|
header->dataNullOffset=(uint16_t)dataMove;
|
|
|
|
header->shiftedHighStart=0;
|
|
|
|
|
|
|
|
/* fill the index and data arrays */
|
|
|
|
dest16=(uint16_t *)(header+1);
|
|
|
|
trie->index=dest16;
|
|
|
|
|
|
|
|
/* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
|
|
|
|
for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
|
|
|
|
*dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* write UTF-8 2-byte index-2 values, not right-shifted */
|
|
|
|
for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
|
|
|
|
*dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
|
|
|
|
}
|
|
|
|
for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
|
|
|
|
*dest16++=(uint16_t)dataMove;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* write the 16/32-bit data array */
|
|
|
|
switch(valueBits) {
|
|
|
|
case UTRIE2_16_VALUE_BITS:
|
|
|
|
/* write 16-bit data values */
|
|
|
|
trie->data16=dest16;
|
|
|
|
trie->data32=NULL;
|
|
|
|
for(i=0; i<0x80; ++i) {
|
|
|
|
*dest16++=(uint16_t)initialValue;
|
|
|
|
}
|
|
|
|
for(; i<0xc0; ++i) {
|
|
|
|
*dest16++=(uint16_t)errorValue;
|
|
|
|
}
|
|
|
|
/* highValue and reserved values */
|
|
|
|
for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
|
|
|
|
*dest16++=(uint16_t)initialValue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case UTRIE2_32_VALUE_BITS:
|
|
|
|
/* write 32-bit data values */
|
|
|
|
p=(uint32_t *)dest16;
|
|
|
|
trie->data16=NULL;
|
|
|
|
trie->data32=p;
|
|
|
|
for(i=0; i<0x80; ++i) {
|
|
|
|
*p++=initialValue;
|
|
|
|
}
|
|
|
|
for(; i<0xc0; ++i) {
|
|
|
|
*p++=errorValue;
|
|
|
|
}
|
|
|
|
/* highValue and reserved values */
|
|
|
|
for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
|
|
|
|
*p++=initialValue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return trie;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
utrie2_close(UTrie2 *trie) {
|
|
|
|
if(trie!=NULL) {
|
|
|
|
if(trie->isMemoryOwned) {
|
|
|
|
uprv_free(trie->memory);
|
|
|
|
}
|
|
|
|
if(trie->newTrie!=NULL) {
|
|
|
|
uprv_free(trie->newTrie->data);
|
|
|
|
uprv_free(trie->newTrie);
|
|
|
|
}
|
|
|
|
uprv_free(trie);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
|
|
|
|
uint32_t signature;
|
|
|
|
if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
signature=*(const uint32_t *)data;
|
|
|
|
if(signature==UTRIE2_SIG) {
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if(anyEndianOk && signature==UTRIE2_OE_SIG) {
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if(signature==UTRIE_SIG) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if(anyEndianOk && signature==UTRIE_OE_SIG) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
|
|
utrie2_isFrozen(const UTrie2 *trie) {
|
|
|
|
return (UBool)(trie->newTrie==NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
utrie2_serialize(const UTrie2 *trie,
|
|
|
|
void *data, int32_t capacity,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
/* argument check */
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
|
|
|
|
capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
|
|
|
|
) {
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(capacity>=trie->length) {
|
|
|
|
uprv_memcpy(data, trie->memory, trie->length);
|
|
|
|
} else {
|
|
|
|
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
|
|
|
}
|
|
|
|
return trie->length;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
utrie2_swap(const UDataSwapper *ds,
|
|
|
|
const void *inData, int32_t length, void *outData,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
const UTrie2Header *inTrie;
|
|
|
|
UTrie2Header trie;
|
|
|
|
int32_t dataLength, size;
|
|
|
|
UTrie2ValueBits valueBits;
|
|
|
|
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* setup and swapping */
|
|
|
|
if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
|
|
|
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
inTrie=(const UTrie2Header *)inData;
|
|
|
|
trie.signature=ds->readUInt32(inTrie->signature);
|
|
|
|
trie.options=ds->readUInt16(inTrie->options);
|
|
|
|
trie.indexLength=ds->readUInt16(inTrie->indexLength);
|
|
|
|
trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
|
|
|
|
|
|
|
|
valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
|
|
|
|
dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
|
|
|
|
|
|
|
|
if( trie.signature!=UTRIE2_SIG ||
|
|
|
|
valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
|
|
|
|
trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
|
|
|
|
dataLength<UTRIE2_DATA_START_OFFSET
|
|
|
|
) {
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
size=sizeof(UTrie2Header)+trie.indexLength*2;
|
|
|
|
switch(valueBits) {
|
|
|
|
case UTRIE2_16_VALUE_BITS:
|
|
|
|
size+=dataLength*2;
|
|
|
|
break;
|
|
|
|
case UTRIE2_32_VALUE_BITS:
|
|
|
|
size+=dataLength*4;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(length>=0) {
|
|
|
|
UTrie2Header *outTrie;
|
|
|
|
|
|
|
|
if(length<size) {
|
|
|
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
outTrie=(UTrie2Header *)outData;
|
|
|
|
|
|
|
|
/* swap the header */
|
|
|
|
ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
|
|
|
|
ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
|
|
|
|
|
|
|
|
/* swap the index and the data */
|
|
|
|
switch(valueBits) {
|
|
|
|
case UTRIE2_16_VALUE_BITS:
|
|
|
|
ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
|
|
|
|
break;
|
|
|
|
case UTRIE2_32_VALUE_BITS:
|
|
|
|
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
|
|
|
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
|
|
|
|
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
// utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
|
|
|
|
// to avoid a dependency from utrie2.cpp on utrie.c.
|
|
|
|
|
|
|
|
/* enumeration -------------------------------------------------------------- */
|
|
|
|
|
|
|
|
#define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
|
|
|
|
|
|
|
|
/* default UTrie2EnumValue() returns the input value itself */
|
|
|
|
static uint32_t U_CALLCONV
|
|
|
|
enumSameValue(const void * /*context*/, uint32_t value) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Enumerate all ranges of code points with the same relevant values.
|
|
|
|
* The values are transformed from the raw trie entries by the enumValue function.
|
|
|
|
*
|
|
|
|
* Currently requires start<limit and both start and limit must be multiples
|
|
|
|
* of UTRIE2_DATA_BLOCK_LENGTH.
|
|
|
|
*
|
|
|
|
* Optimizations:
|
|
|
|
* - Skip a whole block if we know that it is filled with a single value,
|
|
|
|
* and it is the same as we visited just before.
|
|
|
|
* - Handle the null block specially because we know a priori that it is filled
|
|
|
|
* with a single value.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
enumEitherTrie(const UTrie2 *trie,
|
|
|
|
UChar32 start, UChar32 limit,
|
|
|
|
UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
|
|
|
|
const uint32_t *data32;
|
|
|
|
const uint16_t *idx;
|
|
|
|
|
|
|
|
uint32_t value, prevValue, initialValue;
|
|
|
|
UChar32 c, prev, highStart;
|
|
|
|
int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
|
|
|
|
|
|
|
|
if(enumRange==NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if(enumValue==NULL) {
|
|
|
|
enumValue=enumSameValue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(trie->newTrie==NULL) {
|
|
|
|
/* frozen trie */
|
|
|
|
idx=trie->index;
|
|
|
|
U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
|
|
|
|
data32=trie->data32;
|
|
|
|
|
|
|
|
index2NullOffset=trie->index2NullOffset;
|
|
|
|
nullBlock=trie->dataNullOffset;
|
|
|
|
} else {
|
|
|
|
/* unfrozen, mutable trie */
|
|
|
|
idx=NULL;
|
|
|
|
data32=trie->newTrie->data;
|
|
|
|
U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
|
|
|
|
|
|
|
|
index2NullOffset=trie->newTrie->index2NullOffset;
|
|
|
|
nullBlock=trie->newTrie->dataNullOffset;
|
|
|
|
}
|
|
|
|
|
|
|
|
highStart=trie->highStart;
|
|
|
|
|
|
|
|
/* get the enumeration value that corresponds to an initial-value trie data entry */
|
|
|
|
initialValue=enumValue(context, trie->initialValue);
|
|
|
|
|
|
|
|
/* set variables for previous range */
|
|
|
|
prevI2Block=-1;
|
|
|
|
prevBlock=-1;
|
|
|
|
prev=start;
|
|
|
|
prevValue=0;
|
|
|
|
|
|
|
|
/* enumerate index-2 blocks */
|
|
|
|
for(c=start; c<limit && c<highStart;) {
|
|
|
|
/* Code point limit for iterating inside this i2Block. */
|
|
|
|
UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
|
|
|
|
if(limit<tempLimit) {
|
|
|
|
tempLimit=limit;
|
|
|
|
}
|
|
|
|
if(c<=0xffff) {
|
|
|
|
if(!U_IS_SURROGATE(c)) {
|
|
|
|
i2Block=c>>UTRIE2_SHIFT_2;
|
|
|
|
} else if(U_IS_SURROGATE_LEAD(c)) {
|
|
|
|
/*
|
|
|
|
* Enumerate values for lead surrogate code points, not code units:
|
|
|
|
* This special block has half the normal length.
|
|
|
|
*/
|
|
|
|
i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
|
|
|
|
tempLimit=MIN_VALUE(0xdc00, limit);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Switch back to the normal part of the index-2 table.
|
|
|
|
* Enumerate the second half of the surrogates block.
|
|
|
|
*/
|
|
|
|
i2Block=0xd800>>UTRIE2_SHIFT_2;
|
|
|
|
tempLimit=MIN_VALUE(0xe000, limit);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* supplementary code points */
|
|
|
|
if(idx!=NULL) {
|
|
|
|
i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
|
|
|
|
(c>>UTRIE2_SHIFT_1)];
|
|
|
|
} else {
|
|
|
|
i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
|
|
|
|
}
|
|
|
|
if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
|
|
|
|
/*
|
|
|
|
* The index-2 block is the same as the previous one, and filled with prevValue.
|
|
|
|
* Only possible for supplementary code points because the linear-BMP index-2
|
|
|
|
* table creates unique i2Block values.
|
|
|
|
*/
|
|
|
|
c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
prevI2Block=i2Block;
|
|
|
|
if(i2Block==index2NullOffset) {
|
|
|
|
/* this is the null index-2 block */
|
|
|
|
if(prevValue!=initialValue) {
|
|
|
|
if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
prevBlock=nullBlock;
|
|
|
|
prev=c;
|
|
|
|
prevValue=initialValue;
|
|
|
|
}
|
|
|
|
c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
|
|
|
|
} else {
|
|
|
|
/* enumerate data blocks for one index-2 block */
|
|
|
|
int32_t i2, i2Limit;
|
|
|
|
i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
|
|
|
|
if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
|
|
|
|
i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
|
|
|
|
} else {
|
|
|
|
i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
|
|
|
|
}
|
|
|
|
for(; i2<i2Limit; ++i2) {
|
|
|
|
if(idx!=NULL) {
|
|
|
|
block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
|
|
|
|
} else {
|
|
|
|
block=trie->newTrie->index2[i2Block+i2];
|
|
|
|
}
|
|
|
|
if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
|
|
|
|
/* the block is the same as the previous one, and filled with prevValue */
|
|
|
|
c+=UTRIE2_DATA_BLOCK_LENGTH;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
prevBlock=block;
|
|
|
|
if(block==nullBlock) {
|
|
|
|
/* this is the null data block */
|
|
|
|
if(prevValue!=initialValue) {
|
|
|
|
if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
prev=c;
|
|
|
|
prevValue=initialValue;
|
|
|
|
}
|
|
|
|
c+=UTRIE2_DATA_BLOCK_LENGTH;
|
|
|
|
} else {
|
|
|
|
for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
|
|
|
|
value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
|
|
|
|
if(value!=prevValue) {
|
|
|
|
if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
prev=c;
|
|
|
|
prevValue=value;
|
|
|
|
}
|
|
|
|
++c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(c>limit) {
|
|
|
|
c=limit; /* could be higher if in the index2NullOffset */
|
|
|
|
} else if(c<limit) {
|
|
|
|
/* c==highStart<limit */
|
|
|
|
uint32_t highValue;
|
|
|
|
if(idx!=NULL) {
|
|
|
|
highValue=
|
|
|
|
data32!=NULL ?
|
|
|
|
data32[trie->highValueIndex] :
|
|
|
|
idx[trie->highValueIndex];
|
|
|
|
} else {
|
|
|
|
highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
|
|
|
|
}
|
|
|
|
value=enumValue(context, highValue);
|
|
|
|
if(value!=prevValue) {
|
|
|
|
if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
prev=c;
|
|
|
|
prevValue=value;
|
|
|
|
}
|
|
|
|
c=limit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* deliver last range */
|
|
|
|
enumRange(context, prev, c-1, prevValue);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
utrie2_enum(const UTrie2 *trie,
|
|
|
|
UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
|
|
|
|
enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
|
|
|
|
UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
|
|
|
|
const void *context) {
|
|
|
|
if(!U16_IS_LEAD(lead)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
lead=(lead-0xd7c0)<<10; /* start code point */
|
|
|
|
enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* C++ convenience wrappers ------------------------------------------------- */
|
|
|
|
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
uint16_t BackwardUTrie2StringIterator::previous16() {
|
|
|
|
codePointLimit=codePointStart;
|
|
|
|
if(start>=codePointStart) {
|
|
|
|
codePoint=U_SENTINEL;
|
2018-01-13 22:57:36 +01:00
|
|
|
return trie->errorValue;
|
2018-01-13 22:54:20 +01:00
|
|
|
}
|
|
|
|
uint16_t result;
|
|
|
|
UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t ForwardUTrie2StringIterator::next16() {
|
|
|
|
codePointStart=codePointLimit;
|
|
|
|
if(codePointLimit==limit) {
|
|
|
|
codePoint=U_SENTINEL;
|
2018-01-13 22:57:36 +01:00
|
|
|
return trie->errorValue;
|
2018-01-13 22:54:20 +01:00
|
|
|
}
|
|
|
|
uint16_t result;
|
|
|
|
UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_NAMESPACE_END
|