fixed doc typos; changed maximum sequence length to 4

This commit is contained in:
Günter Obiltschnig
2018-02-13 21:32:17 +01:00
parent bdd0478ead
commit 6ed085f288
3 changed files with 60 additions and 62 deletions

View File

@@ -45,7 +45,7 @@ public:
enum enum
{ {
MAX_SEQUENCE_LENGTH = 6 /// The maximum character byte sequence length supported. MAX_SEQUENCE_LENGTH = 4 /// The maximum character byte sequence length supported.
}; };
typedef int CharacterMap[256]; typedef int CharacterMap[256];
@@ -55,7 +55,7 @@ public:
/// If map[b] is -1, then the byte sequence is malformed. /// If map[b] is -1, then the byte sequence is malformed.
/// If map[b] is -n, where n >= 2, then b is the first byte of an n-byte /// If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
/// sequence that encodes a single Unicode scalar value. Byte sequences up /// sequence that encodes a single Unicode scalar value. Byte sequences up
/// to 6 bytes in length are supported. /// to 4 bytes in length are supported.
virtual ~TextEncoding(); virtual ~TextEncoding();
/// Destroys the encoding. /// Destroys the encoding.
@@ -86,6 +86,7 @@ public:
/// ///
/// The convert function must return the Unicode scalar value /// The convert function must return the Unicode scalar value
/// represented by this byte sequence or -1 if the byte sequence is malformed. /// represented by this byte sequence or -1 if the byte sequence is malformed.
///
/// The default implementation returns (int) bytes[0]. /// The default implementation returns (int) bytes[0].
virtual int queryConvert(const unsigned char* bytes, int length) const; virtual int queryConvert(const unsigned char* bytes, int length) const;
@@ -95,14 +96,15 @@ public:
/// ///
/// The queryConvert function must return the Unicode scalar value /// The queryConvert function must return the Unicode scalar value
/// represented by this byte sequence or -1 if the byte sequence is malformed /// represented by this byte sequence or -1 if the byte sequence is malformed
/// or -n where n is number of bytes requested for the sequence, if lenght is /// or -n where n is number of bytes requested for the sequence, if length is
/// shorter than the sequence. /// shorter than the sequence.
/// The length of the sequence might not be determined by the first byte, /// The length of the sequence might not be determined by the first byte,
/// in which case the conversion becomes an iterative process: /// in which case the conversion becomes an iterative process:
/// First call with length == 1 might return -2, /// First call with length == 1 might return -2,
/// Then a second call with lenght == 2 might return -4 /// Then a second call with length == 2 might return -4
/// Eventually, the third call with length == 4 should return either a /// Eventually, the third call with length == 4 should return either a
/// Unicode scalar value, or -1 if the byte sequence is malformed. /// Unicode scalar value, or -1 if the byte sequence is malformed.
///
/// The default implementation returns (int) bytes[0]. /// The default implementation returns (int) bytes[0].
virtual int sequenceLength(const unsigned char* bytes, int length) const; virtual int sequenceLength(const unsigned char* bytes, int length) const;
@@ -110,7 +112,7 @@ public:
/// by bytes. The length paramater should be greater or equal to the length of /// by bytes. The length paramater should be greater or equal to the length of
/// the sequence. /// the sequence.
/// ///
/// The sequenceLength function must return the lenght of the sequence /// The sequenceLength function must return the length of the sequence
/// represented by this byte sequence or a negative value -n if length is /// represented by this byte sequence or a negative value -n if length is
/// shorter than the sequence, where n is the number of byte requested /// shorter than the sequence, where n is the number of byte requested
/// to determine the length of the sequence. /// to determine the length of the sequence.
@@ -118,7 +120,7 @@ public:
/// in which case the conversion becomes an iterative process as long as the /// in which case the conversion becomes an iterative process as long as the
/// result is negative: /// result is negative:
/// First call with length == 1 might return -2, /// First call with length == 1 might return -2,
/// Then a second call with lenght == 2 might return -4 /// Then a second call with length == 2 might return -4
/// Eventually, the third call with length == 4 should return 4. /// Eventually, the third call with length == 4 should return 4.
/// The default implementation returns 1. /// The default implementation returns 1.

View File

@@ -44,7 +44,7 @@ const TextEncoding::CharacterMap UTF8Encoding::_charMap =
/* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, /* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, /* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* e0 */ -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* e0 */ -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
/* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -5, -5, -5, -5, -6, -6, -1, -1, /* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -1, -1, -1, -1, -1, -1, -1, -1,
}; };
@@ -88,8 +88,6 @@ int UTF8Encoding::convert(const unsigned char* bytes) const
switch (n) switch (n)
{ {
case -6:
case -5:
case -1: case -1:
return -1; return -1;
case -4: case -4:
@@ -165,8 +163,6 @@ int UTF8Encoding::queryConvert(const unsigned char* bytes, int length) const
{ {
switch (n) switch (n)
{ {
case -6:
case -5:
case -1: case -1:
return -1; return -1;
case -4: case -4:

View File

@@ -79,7 +79,7 @@ void UTF8StringTest::testTransform()
// a mix of invalid sequences // a mix of invalid sequences
std::string str = "\xC2\xE5\xF0\xF8\xE8\xED\xFB+-++"; std::string str = "\xC2\xE5\xF0\xF8\xE8\xED\xFB+-++";
assert ("???" == UTF8::toLower(str)); assert ("???+-++" == UTF8::toLower(str));
} }