fixed doc typos; changed maximum sequence length to 4

2025-10-17 11:05:03 +02:00 · 2018-02-13 21:32:17 +01:00
parent bdd0478ead
commit 6ed085f288
3 changed files with 60 additions and 62 deletions
--- a/Foundation/include/Poco/TextEncoding.h
+++ b/Foundation/include/Poco/TextEncoding.h
@@ -45,7 +45,7 @@ public:

 	enum
 	{
-		MAX_SEQUENCE_LENGTH = 6 /// The maximum character byte sequence length supported.
+		MAX_SEQUENCE_LENGTH = 4 /// The maximum character byte sequence length supported.
 	};

 	typedef int CharacterMap[256];
@@ -55,7 +55,7 @@ public:
 		/// If map[b] is -1, then the byte sequence is malformed.
 		/// If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
 		/// sequence that encodes a single Unicode scalar value. Byte sequences up
-		/// to 6 bytes in length are supported.
+		/// to 4 bytes in length are supported.

 	virtual ~TextEncoding();
 		/// Destroys the encoding.
@@ -86,6 +86,7 @@ public:
 		///
 		/// The convert function must return the Unicode scalar value
 		/// represented by this byte sequence or -1 if the byte sequence is malformed.
+		///
 		/// The default implementation returns (int) bytes[0].

 	virtual	int queryConvert(const unsigned char* bytes, int length) const;
@@ -95,14 +96,15 @@ public:
 		///
 		/// The queryConvert function must return the Unicode scalar value
 		/// represented by this byte sequence or -1 if the byte sequence is malformed
-		/// or -n where n is number of bytes requested for the sequence, if lenght is 
+		/// or -n where n is number of bytes requested for the sequence, if length is
 		/// shorter than the sequence.
 		/// The length of the sequence might not be determined by the first byte,
 		/// in which case the conversion becomes an iterative process:
 		/// First call with length == 1 might return -2,
-		/// Then a second call with lenght == 2 might return -4
+		/// Then a second call with length == 2 might return -4
 		/// Eventually, the third call with length == 4 should return either a
 		/// Unicode scalar value, or -1 if the byte sequence is malformed.
+		///
 		/// The default implementation returns (int) bytes[0].

 	virtual int sequenceLength(const unsigned char* bytes, int length) const;
@@ -110,7 +112,7 @@ public:
 		/// by bytes. The length paramater should be greater or equal to the length of
 		/// the sequence.
 		///
-		/// The sequenceLength function must return the lenght of the sequence
+		/// The sequenceLength function must return the length of the sequence
 		/// represented by this byte sequence or a negative value -n if length is
 		/// shorter than the sequence, where n is the number of byte requested
 		/// to determine the length of the sequence.
@@ -118,7 +120,7 @@ public:
 		/// in which case the conversion becomes an iterative process as long as the
 		/// result is negative:
 		/// First call with length == 1 might return -2,
-		/// Then a second call with lenght == 2 might return -4
+		/// Then a second call with length == 2 might return -4
 		/// Eventually, the third call with length == 4 should return 4.
 		/// The default implementation returns 1.

--- a/Foundation/src/UTF8Encoding.cpp
+++ b/Foundation/src/UTF8Encoding.cpp
@@ -44,7 +44,7 @@ const TextEncoding::CharacterMap UTF8Encoding::_charMap =
 	/* c0 */	  -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,
 	/* d0 */	  -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,   -2,
 	/* e0 */	  -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,   -3,
-	/* f0 */	  -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -5,   -5,   -5,   -5,   -6,   -6,   -1,   -1, 
+	/* f0 */	  -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 };


@@ -88,8 +88,6 @@ int UTF8Encoding::convert(const unsigned char* bytes) const

 	switch (n)
 	{
-	case -6:
-	case -5:
 	case -1:
 		return -1;
 	case -4:
@@ -165,8 +163,6 @@ int UTF8Encoding::queryConvert(const unsigned char* bytes, int length) const
 	{
 		switch (n)
 		{
-		case -6:
-		case -5:
 		case -1:
 			return -1;
 		case -4:
--- a/Foundation/testsuite/src/UTF8StringTest.cpp
+++ b/Foundation/testsuite/src/UTF8StringTest.cpp
@@ -79,7 +79,7 @@ void UTF8StringTest::testTransform()

 	// a mix of invalid sequences
 	std::string str = "\xC2\xE5\xF0\xF8\xE8\xED\xFB+-++";
-	assert ("???" == UTF8::toLower(str));
+	assert ("???+-++" == UTF8::toLower(str));
 }