578 lines
18 KiB
C++
578 lines
18 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 1997-2013, International Business Machines Corporation and *
|
|
* others. All Rights Reserved. *
|
|
*******************************************************************************
|
|
*
|
|
* File CHOICFMT.CPP
|
|
*
|
|
* Modification History:
|
|
*
|
|
* Date Name Description
|
|
* 02/19/97 aliu Converted from java.
|
|
* 03/20/97 helena Finished first cut of implementation and got rid
|
|
* of nextDouble/previousDouble and replaced with
|
|
* boolean array.
|
|
* 4/10/97 aliu Clean up. Modified to work on AIX.
|
|
* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
|
|
* wchar.h.
|
|
* 07/09/97 helena Made ParsePosition into a class.
|
|
* 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
|
|
* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
|
|
* 02/22/99 stephen Removed character literals for EBCDIC safety
|
|
********************************************************************************
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_FORMATTING
|
|
|
|
#include "unicode/choicfmt.h"
|
|
#include "unicode/numfmt.h"
|
|
#include "unicode/locid.h"
|
|
#include "cpputils.h"
|
|
#include "cstring.h"
|
|
#include "messageimpl.h"
|
|
#include "putilimp.h"
|
|
#include "uassert.h"
|
|
#include <stdio.h>
|
|
#include <float.h>
|
|
|
|
// *****************************************************************************
|
|
// class ChoiceFormat
|
|
// *****************************************************************************
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
|
|
|
|
// Special characters used by ChoiceFormat. There are two characters
|
|
// used interchangeably to indicate <=. Either is parsed, but only
|
|
// LESS_EQUAL is generated by toPattern().
|
|
#define SINGLE_QUOTE ((UChar)0x0027) /*'*/
|
|
#define LESS_THAN ((UChar)0x003C) /*<*/
|
|
#define LESS_EQUAL ((UChar)0x0023) /*#*/
|
|
#define LESS_EQUAL2 ((UChar)0x2264)
|
|
#define VERTICAL_BAR ((UChar)0x007C) /*|*/
|
|
#define MINUS ((UChar)0x002D) /*-*/
|
|
|
|
static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/
|
|
static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/
|
|
|
|
#ifdef INFINITY
|
|
#undef INFINITY
|
|
#endif
|
|
#define INFINITY ((UChar)0x221E)
|
|
|
|
//static const UChar gPositiveInfinity[] = {INFINITY, 0};
|
|
//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
|
|
#define POSITIVE_INF_STRLEN 1
|
|
#define NEGATIVE_INF_STRLEN 2
|
|
|
|
// -------------------------------------
|
|
// Creates a ChoiceFormat instance based on the pattern.
|
|
|
|
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
|
|
UErrorCode& status)
|
|
: constructorErrorCode(status),
|
|
msgPattern(status)
|
|
{
|
|
applyPattern(newPattern, status);
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Creates a ChoiceFormat instance with the limit array and
|
|
// format strings for each limit.
|
|
|
|
ChoiceFormat::ChoiceFormat(const double* limits,
|
|
const UnicodeString* formats,
|
|
int32_t cnt )
|
|
: constructorErrorCode(U_ZERO_ERROR),
|
|
msgPattern(constructorErrorCode)
|
|
{
|
|
setChoices(limits, NULL, formats, cnt, constructorErrorCode);
|
|
}
|
|
|
|
// -------------------------------------
|
|
|
|
ChoiceFormat::ChoiceFormat(const double* limits,
|
|
const UBool* closures,
|
|
const UnicodeString* formats,
|
|
int32_t cnt )
|
|
: constructorErrorCode(U_ZERO_ERROR),
|
|
msgPattern(constructorErrorCode)
|
|
{
|
|
setChoices(limits, closures, formats, cnt, constructorErrorCode);
|
|
}
|
|
|
|
// -------------------------------------
|
|
// copy constructor
|
|
|
|
ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
|
|
: NumberFormat(that),
|
|
constructorErrorCode(that.constructorErrorCode),
|
|
msgPattern(that.msgPattern)
|
|
{
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Private constructor that creates a
|
|
// ChoiceFormat instance based on the
|
|
// pattern and populates UParseError
|
|
|
|
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
|
|
UParseError& parseError,
|
|
UErrorCode& status)
|
|
: constructorErrorCode(status),
|
|
msgPattern(status)
|
|
{
|
|
applyPattern(newPattern,parseError, status);
|
|
}
|
|
// -------------------------------------
|
|
|
|
UBool
|
|
ChoiceFormat::operator==(const Format& that) const
|
|
{
|
|
if (this == &that) return TRUE;
|
|
if (!NumberFormat::operator==(that)) return FALSE;
|
|
ChoiceFormat& thatAlias = (ChoiceFormat&)that;
|
|
return msgPattern == thatAlias.msgPattern;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// copy constructor
|
|
|
|
const ChoiceFormat&
|
|
ChoiceFormat::operator=(const ChoiceFormat& that)
|
|
{
|
|
if (this != &that) {
|
|
NumberFormat::operator=(that);
|
|
constructorErrorCode = that.constructorErrorCode;
|
|
msgPattern = that.msgPattern;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
// -------------------------------------
|
|
|
|
ChoiceFormat::~ChoiceFormat()
|
|
{
|
|
}
|
|
|
|
// -------------------------------------
|
|
|
|
/**
|
|
* Convert a double value to a string without the overhead of NumberFormat.
|
|
*/
|
|
UnicodeString&
|
|
ChoiceFormat::dtos(double value,
|
|
UnicodeString& string)
|
|
{
|
|
/* Buffer to contain the digits and any extra formatting stuff. */
|
|
char temp[DBL_DIG + 16];
|
|
char *itrPtr = temp;
|
|
char *expPtr;
|
|
|
|
sprintf(temp, "%.*g", DBL_DIG, value);
|
|
|
|
/* Find and convert the decimal point.
|
|
Using setlocale on some machines will cause sprintf to use a comma for certain locales.
|
|
*/
|
|
while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
|
|
itrPtr++;
|
|
}
|
|
if (*itrPtr != 0 && *itrPtr != 'e') {
|
|
/* We reached something that looks like a decimal point.
|
|
In case someone used setlocale(), which changes the decimal point. */
|
|
*itrPtr = '.';
|
|
itrPtr++;
|
|
}
|
|
/* Search for the exponent */
|
|
while (*itrPtr && *itrPtr != 'e') {
|
|
itrPtr++;
|
|
}
|
|
if (*itrPtr == 'e') {
|
|
itrPtr++;
|
|
/* Verify the exponent sign */
|
|
if (*itrPtr == '+' || *itrPtr == '-') {
|
|
itrPtr++;
|
|
}
|
|
/* Remove leading zeros. You will see this on Windows machines. */
|
|
expPtr = itrPtr;
|
|
while (*itrPtr == '0') {
|
|
itrPtr++;
|
|
}
|
|
if (*itrPtr && expPtr != itrPtr) {
|
|
/* Shift the exponent without zeros. */
|
|
while (*itrPtr) {
|
|
*(expPtr++) = *(itrPtr++);
|
|
}
|
|
// NULL terminate
|
|
*expPtr = 0;
|
|
}
|
|
}
|
|
|
|
string = UnicodeString(temp, -1, US_INV); /* invariant codepage */
|
|
return string;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// calls the overloaded applyPattern method.
|
|
|
|
void
|
|
ChoiceFormat::applyPattern(const UnicodeString& pattern,
|
|
UErrorCode& status)
|
|
{
|
|
msgPattern.parseChoiceStyle(pattern, NULL, status);
|
|
constructorErrorCode = status;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Applies the pattern to this ChoiceFormat instance.
|
|
|
|
void
|
|
ChoiceFormat::applyPattern(const UnicodeString& pattern,
|
|
UParseError& parseError,
|
|
UErrorCode& status)
|
|
{
|
|
msgPattern.parseChoiceStyle(pattern, &parseError, status);
|
|
constructorErrorCode = status;
|
|
}
|
|
// -------------------------------------
|
|
// Returns the input pattern string.
|
|
|
|
UnicodeString&
|
|
ChoiceFormat::toPattern(UnicodeString& result) const
|
|
{
|
|
return result = msgPattern.getPatternString();
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Sets the limit and format arrays.
|
|
void
|
|
ChoiceFormat::setChoices( const double* limits,
|
|
const UnicodeString* formats,
|
|
int32_t cnt )
|
|
{
|
|
UErrorCode errorCode = U_ZERO_ERROR;
|
|
setChoices(limits, NULL, formats, cnt, errorCode);
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Sets the limit and format arrays.
|
|
void
|
|
ChoiceFormat::setChoices( const double* limits,
|
|
const UBool* closures,
|
|
const UnicodeString* formats,
|
|
int32_t cnt )
|
|
{
|
|
UErrorCode errorCode = U_ZERO_ERROR;
|
|
setChoices(limits, closures, formats, cnt, errorCode);
|
|
}
|
|
|
|
void
|
|
ChoiceFormat::setChoices(const double* limits,
|
|
const UBool* closures,
|
|
const UnicodeString* formats,
|
|
int32_t count,
|
|
UErrorCode &errorCode) {
|
|
if (U_FAILURE(errorCode)) {
|
|
return;
|
|
}
|
|
if (limits == NULL || formats == NULL) {
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
// Reconstruct the original input pattern.
|
|
// Modified version of the pre-ICU 4.8 toPattern() implementation.
|
|
UnicodeString result;
|
|
for (int32_t i = 0; i < count; ++i) {
|
|
if (i != 0) {
|
|
result += VERTICAL_BAR;
|
|
}
|
|
UnicodeString buf;
|
|
if (uprv_isPositiveInfinity(limits[i])) {
|
|
result += INFINITY;
|
|
} else if (uprv_isNegativeInfinity(limits[i])) {
|
|
result += MINUS;
|
|
result += INFINITY;
|
|
} else {
|
|
result += dtos(limits[i], buf);
|
|
}
|
|
if (closures != NULL && closures[i]) {
|
|
result += LESS_THAN;
|
|
} else {
|
|
result += LESS_EQUAL;
|
|
}
|
|
// Append formats[i], using quotes if there are special
|
|
// characters. Single quotes themselves must be escaped in
|
|
// either case.
|
|
const UnicodeString& text = formats[i];
|
|
int32_t textLength = text.length();
|
|
int32_t nestingLevel = 0;
|
|
for (int32_t j = 0; j < textLength; ++j) {
|
|
UChar c = text[j];
|
|
if (c == SINGLE_QUOTE && nestingLevel == 0) {
|
|
// Double each top-level apostrophe.
|
|
result.append(c);
|
|
} else if (c == VERTICAL_BAR && nestingLevel == 0) {
|
|
// Surround each pipe symbol with apostrophes for quoting.
|
|
// If the next character is an apostrophe, then that will be doubled,
|
|
// and although the parser will see the apostrophe pairs beginning
|
|
// and ending one character earlier than our doubling, the result
|
|
// is as desired.
|
|
// | -> '|'
|
|
// |' -> '|'''
|
|
// |'' -> '|''''' etc.
|
|
result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
|
|
continue; // Skip the append(c) at the end of the loop body.
|
|
} else if (c == LEFT_CURLY_BRACE) {
|
|
++nestingLevel;
|
|
} else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
|
|
--nestingLevel;
|
|
}
|
|
result.append(c);
|
|
}
|
|
}
|
|
// Apply the reconstructed pattern.
|
|
applyPattern(result, errorCode);
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Gets the limit array.
|
|
|
|
const double*
|
|
ChoiceFormat::getLimits(int32_t& cnt) const
|
|
{
|
|
cnt = 0;
|
|
return NULL;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Gets the closures array.
|
|
|
|
const UBool*
|
|
ChoiceFormat::getClosures(int32_t& cnt) const
|
|
{
|
|
cnt = 0;
|
|
return NULL;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Gets the format array.
|
|
|
|
const UnicodeString*
|
|
ChoiceFormat::getFormats(int32_t& cnt) const
|
|
{
|
|
cnt = 0;
|
|
return NULL;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Formats an int64 number, it's actually formatted as
|
|
// a double. The returned format string may differ
|
|
// from the input number because of this.
|
|
|
|
UnicodeString&
|
|
ChoiceFormat::format(int64_t number,
|
|
UnicodeString& appendTo,
|
|
FieldPosition& status) const
|
|
{
|
|
return format((double) number, appendTo, status);
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Formats an int32_t number, it's actually formatted as
|
|
// a double.
|
|
|
|
UnicodeString&
|
|
ChoiceFormat::format(int32_t number,
|
|
UnicodeString& appendTo,
|
|
FieldPosition& status) const
|
|
{
|
|
return format((double) number, appendTo, status);
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Formats a double number.
|
|
|
|
UnicodeString&
|
|
ChoiceFormat::format(double number,
|
|
UnicodeString& appendTo,
|
|
FieldPosition& /*pos*/) const
|
|
{
|
|
if (msgPattern.countParts() == 0) {
|
|
// No pattern was applied, or it failed.
|
|
return appendTo;
|
|
}
|
|
// Get the appropriate sub-message.
|
|
int32_t msgStart = findSubMessage(msgPattern, 0, number);
|
|
if (!MessageImpl::jdkAposMode(msgPattern)) {
|
|
int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
|
|
int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
|
|
appendTo.append(msgPattern.getPatternString(),
|
|
patternStart,
|
|
msgPattern.getPatternIndex(msgLimit) - patternStart);
|
|
return appendTo;
|
|
}
|
|
// JDK compatibility mode: Remove SKIP_SYNTAX.
|
|
return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
|
|
}
|
|
|
|
int32_t
|
|
ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
|
|
int32_t count = pattern.countParts();
|
|
int32_t msgStart;
|
|
// Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
|
|
// until ARG_LIMIT or end of choice-only pattern.
|
|
// Ignore the first number and selector and start the loop on the first message.
|
|
partIndex += 2;
|
|
for (;;) {
|
|
// Skip but remember the current sub-message.
|
|
msgStart = partIndex;
|
|
partIndex = pattern.getLimitPartIndex(partIndex);
|
|
if (++partIndex >= count) {
|
|
// Reached the end of the choice-only pattern.
|
|
// Return with the last sub-message.
|
|
break;
|
|
}
|
|
const MessagePattern::Part &part = pattern.getPart(partIndex++);
|
|
UMessagePatternPartType type = part.getType();
|
|
if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
|
// Reached the end of the ChoiceFormat style.
|
|
// Return with the last sub-message.
|
|
break;
|
|
}
|
|
// part is an ARG_INT or ARG_DOUBLE
|
|
U_ASSERT(MessagePattern::Part::hasNumericValue(type));
|
|
double boundary = pattern.getNumericValue(part);
|
|
// Fetch the ARG_SELECTOR character.
|
|
int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
|
|
UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
|
|
if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
|
|
// The number is in the interval between the previous boundary and the current one.
|
|
// Return with the sub-message between them.
|
|
// The !(a>b) and !(a>=b) comparisons are equivalent to
|
|
// (a<=b) and (a<b) except they "catch" NaN.
|
|
break;
|
|
}
|
|
}
|
|
return msgStart;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Formats an array of objects. Checks if the data type of the objects
|
|
// to get the right value for formatting.
|
|
|
|
UnicodeString&
|
|
ChoiceFormat::format(const Formattable* objs,
|
|
int32_t cnt,
|
|
UnicodeString& appendTo,
|
|
FieldPosition& pos,
|
|
UErrorCode& status) const
|
|
{
|
|
if(cnt < 0) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return appendTo;
|
|
}
|
|
if (msgPattern.countParts() == 0) {
|
|
status = U_INVALID_STATE_ERROR;
|
|
return appendTo;
|
|
}
|
|
|
|
for (int32_t i = 0; i < cnt; i++) {
|
|
double objDouble = objs[i].getDouble(status);
|
|
if (U_SUCCESS(status)) {
|
|
format(objDouble, appendTo, pos);
|
|
}
|
|
}
|
|
|
|
return appendTo;
|
|
}
|
|
|
|
// -------------------------------------
|
|
|
|
void
|
|
ChoiceFormat::parse(const UnicodeString& text,
|
|
Formattable& result,
|
|
ParsePosition& pos) const
|
|
{
|
|
result.setDouble(parseArgument(msgPattern, 0, text, pos));
|
|
}
|
|
|
|
double
|
|
ChoiceFormat::parseArgument(
|
|
const MessagePattern &pattern, int32_t partIndex,
|
|
const UnicodeString &source, ParsePosition &pos) {
|
|
// find the best number (defined as the one with the longest parse)
|
|
int32_t start = pos.getIndex();
|
|
int32_t furthest = start;
|
|
double bestNumber = uprv_getNaN();
|
|
double tempNumber = 0.0;
|
|
int32_t count = pattern.countParts();
|
|
while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
|
tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
|
|
partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
|
|
int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
|
|
int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
|
|
if (len >= 0) {
|
|
int32_t newIndex = start + len;
|
|
if (newIndex > furthest) {
|
|
furthest = newIndex;
|
|
bestNumber = tempNumber;
|
|
if (furthest == source.length()) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
partIndex = msgLimit + 1;
|
|
}
|
|
if (furthest == start) {
|
|
pos.setErrorIndex(start);
|
|
} else {
|
|
pos.setIndex(furthest);
|
|
}
|
|
return bestNumber;
|
|
}
|
|
|
|
int32_t
|
|
ChoiceFormat::matchStringUntilLimitPart(
|
|
const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
|
|
const UnicodeString &source, int32_t sourceOffset) {
|
|
int32_t matchingSourceLength = 0;
|
|
const UnicodeString &msgString = pattern.getPatternString();
|
|
int32_t prevIndex = pattern.getPart(partIndex).getLimit();
|
|
for (;;) {
|
|
const MessagePattern::Part &part = pattern.getPart(++partIndex);
|
|
if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
|
|
int32_t index = part.getIndex();
|
|
int32_t length = index - prevIndex;
|
|
if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
|
|
return -1; // mismatch
|
|
}
|
|
matchingSourceLength += length;
|
|
if (partIndex == limitPartIndex) {
|
|
return matchingSourceLength;
|
|
}
|
|
prevIndex = part.getLimit(); // SKIP_SYNTAX
|
|
}
|
|
}
|
|
}
|
|
|
|
// -------------------------------------
|
|
|
|
Format*
|
|
ChoiceFormat::clone() const
|
|
{
|
|
ChoiceFormat *aCopy = new ChoiceFormat(*this);
|
|
return aCopy;
|
|
}
|
|
|
|
U_NAMESPACE_END
|
|
|
|
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
|
|
//eof
|