mirror of
https://github.com/pocoproject/poco.git
synced 2025-03-03 12:58:03 +01:00
reduce maximum character sequence length to 4, in accordance with UTF-8
This commit is contained in:
parent
af09a02a34
commit
fab94585bf
@ -48,7 +48,7 @@ public:
|
||||
|
||||
enum
|
||||
{
|
||||
MAX_SEQUENCE_LENGTH = 6 /// The maximum character byte sequence length supported.
|
||||
MAX_SEQUENCE_LENGTH = 4 /// The maximum character byte sequence length supported.
|
||||
};
|
||||
|
||||
typedef int CharacterMap[256];
|
||||
@ -58,7 +58,7 @@ public:
|
||||
/// If map[b] is -1, then the byte sequence is malformed.
|
||||
/// If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
|
||||
/// sequence that encodes a single Unicode scalar value. Byte sequences up
|
||||
/// to 6 bytes in length are supported.
|
||||
/// to 4 bytes in length are supported.
|
||||
|
||||
virtual ~TextEncoding();
|
||||
/// Destroys the encoding.
|
||||
@ -89,6 +89,7 @@ public:
|
||||
///
|
||||
/// The convert function must return the Unicode scalar value
|
||||
/// represented by this byte sequence or -1 if the byte sequence is malformed.
|
||||
///
|
||||
/// The default implementation returns (int) bytes[0].
|
||||
|
||||
virtual int queryConvert(const unsigned char* bytes, int length) const;
|
||||
@ -106,6 +107,7 @@ public:
|
||||
/// Then a second call with length == 2 might return -4
|
||||
/// Eventually, the third call with length == 4 should return either a
|
||||
/// Unicode scalar value, or -1 if the byte sequence is malformed.
|
||||
///
|
||||
/// The default implementation returns (int) bytes[0].
|
||||
|
||||
virtual int sequenceLength(const unsigned char* bytes, int length) const;
|
||||
|
@ -44,7 +44,7 @@ const TextEncoding::CharacterMap UTF8Encoding::_charMap =
|
||||
/* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
/* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
/* e0 */ -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
|
||||
/* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -5, -5, -5, -5, -6, -6, -1, -1,
|
||||
/* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
|
||||
@ -88,8 +88,6 @@ int UTF8Encoding::convert(const unsigned char* bytes) const
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case -6:
|
||||
case -5:
|
||||
case -1:
|
||||
return -1;
|
||||
case -4:
|
||||
@ -165,8 +163,6 @@ int UTF8Encoding::queryConvert(const unsigned char* bytes, int length) const
|
||||
{
|
||||
switch (n)
|
||||
{
|
||||
case -6:
|
||||
case -5:
|
||||
case -1:
|
||||
return -1;
|
||||
case -4:
|
||||
|
@ -79,7 +79,7 @@ void UTF8StringTest::testTransform()
|
||||
|
||||
// a mix of invalid sequences
|
||||
std::string str = "\xC2\xE5\xF0\xF8\xE8\xED\xFB+-++";
|
||||
assert ("???" == UTF8::toLower(str));
|
||||
assert ("???+-++" == UTF8::toLower(str));
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user