fixed doc typos; changed maximum sequence length to 4

This commit is contained in:
Günter Obiltschnig
2018-02-13 21:32:17 +01:00
parent bdd0478ead
commit 6ed085f288
3 changed files with 60 additions and 62 deletions

View File

@@ -45,7 +45,7 @@ public:
enum
{
MAX_SEQUENCE_LENGTH = 6 /// The maximum character byte sequence length supported.
MAX_SEQUENCE_LENGTH = 4 /// The maximum character byte sequence length supported.
};
typedef int CharacterMap[256];
@@ -55,7 +55,7 @@ public:
/// If map[b] is -1, then the byte sequence is malformed.
/// If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
/// sequence that encodes a single Unicode scalar value. Byte sequences up
/// to 6 bytes in length are supported.
/// to 4 bytes in length are supported.
virtual ~TextEncoding();
/// Destroys the encoding.
@@ -86,6 +86,7 @@ public:
///
/// The convert function must return the Unicode scalar value
/// represented by this byte sequence or -1 if the byte sequence is malformed.
///
/// The default implementation returns (int) bytes[0].
virtual int queryConvert(const unsigned char* bytes, int length) const;
@@ -95,14 +96,15 @@ public:
///
/// The queryConvert function must return the Unicode scalar value
/// represented by this byte sequence or -1 if the byte sequence is malformed
/// or -n where n is number of bytes requested for the sequence, if lenght is
/// or -n where n is number of bytes requested for the sequence, if length is
/// shorter than the sequence.
/// The length of the sequence might not be determined by the first byte,
/// in which case the conversion becomes an iterative process:
/// First call with length == 1 might return -2,
/// Then a second call with lenght == 2 might return -4
/// Then a second call with length == 2 might return -4
/// Eventually, the third call with length == 4 should return either a
/// Unicode scalar value, or -1 if the byte sequence is malformed.
///
/// The default implementation returns (int) bytes[0].
virtual int sequenceLength(const unsigned char* bytes, int length) const;
@@ -110,7 +112,7 @@ public:
/// by bytes. The length paramater should be greater or equal to the length of
/// the sequence.
///
/// The sequenceLength function must return the lenght of the sequence
/// The sequenceLength function must return the length of the sequence
/// represented by this byte sequence or a negative value -n if length is
/// shorter than the sequence, where n is the number of byte requested
/// to determine the length of the sequence.
@@ -118,7 +120,7 @@ public:
/// in which case the conversion becomes an iterative process as long as the
/// result is negative:
/// First call with length == 1 might return -2,
/// Then a second call with lenght == 2 might return -4
/// Then a second call with length == 2 might return -4
/// Eventually, the third call with length == 4 should return 4.
/// The default implementation returns 1.

View File

@@ -44,7 +44,7 @@ const TextEncoding::CharacterMap UTF8Encoding::_charMap =
/* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* e0 */ -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
/* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -5, -5, -5, -5, -6, -6, -1, -1,
/* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -1, -1, -1, -1, -1, -1, -1, -1,
};
@@ -88,8 +88,6 @@ int UTF8Encoding::convert(const unsigned char* bytes) const
switch (n)
{
case -6:
case -5:
case -1:
return -1;
case -4:
@@ -165,8 +163,6 @@ int UTF8Encoding::queryConvert(const unsigned char* bytes, int length) const
{
switch (n)
{
case -6:
case -5:
case -1:
return -1;
case -4:

View File

@@ -79,7 +79,7 @@ void UTF8StringTest::testTransform()
// a mix of invalid sequences
std::string str = "\xC2\xE5\xF0\xF8\xE8\xED\xFB+-++";
assert ("???" == UTF8::toLower(str));
assert ("???+-++" == UTF8::toLower(str));
}