trunk/branch integration: TextEncoding update

2025-06-30 16:03:27 +02:00 · 2011-08-22 18:21:40 +00:00 · 2011-08-22 18:21:40 +00:00 · d35ecf85a2
commit d35ecf85a2
parent 319a37b903
7 changed files with 50 additions and 4 deletions
--- a/Foundation/include/Poco/ASCIIEncoding.h
+++ b/Foundation/include/Poco/ASCIIEncoding.h
@ -1,7 +1,7 @@
 //
 // ASCIIEncoding.h
 //
-// $Id: //poco/svn/Foundation/include/Poco/ASCIIEncoding.h#2 $
+// $Id: //poco/1.4/Foundation/include/Poco/ASCIIEncoding.h#1 $
 //
 // Library: Foundation
 // Package: Text
@ -58,6 +58,8 @@ public:
 	const CharacterMap& characterMap() const;
 	int convert(const unsigned char* bytes) const;
 	int convert(int ch, unsigned char* bytes, int length) const;
+	int queryConvert(const unsigned char* bytes, int length) const;
+	int sequenceLength(const unsigned char* bytes, int length) const;
 	
 private:
 	static const char* _names[];
--- a/Foundation/include/Poco/Latin1Encoding.h
+++ b/Foundation/include/Poco/Latin1Encoding.h
@ -1,7 +1,7 @@
 //
 // Latin1Encoding.h
 //
-// $Id: //poco/svn/Foundation/include/Poco/Latin1Encoding.h#2 $
+// $Id: //poco/1.4/Foundation/include/Poco/Latin1Encoding.h#1 $
 //
 // Library: Foundation
 // Package: Text
@ -58,6 +58,8 @@ public:
 	const CharacterMap& characterMap() const;
 	int convert(const unsigned char* bytes) const;
 	int convert(int ch, unsigned char* bytes, int length) const;
+	int queryConvert(const unsigned char* bytes, int length) const;
+	int sequenceLength(const unsigned char* bytes, int length) const;
 	
 private:
 	static const char* _names[];
--- a/Foundation/include/Poco/Latin9Encoding.h
+++ b/Foundation/include/Poco/Latin9Encoding.h
@ -61,6 +61,8 @@ public:
 	const CharacterMap& characterMap() const;
 	int convert(const unsigned char* bytes) const;
 	int convert(int ch, unsigned char* bytes, int length) const;
+	int queryConvert(const unsigned char* bytes, int length) const;
+	int sequenceLength(const unsigned char* bytes, int length) const;
 	
 private:
 	static const char* _names[];
--- a/Foundation/include/Poco/TextEncoding.h
+++ b/Foundation/include/Poco/TextEncoding.h
@ -104,12 +104,46 @@ public:
 	virtual int convert(const unsigned char* bytes) const;
 		/// The convert function is used to convert multibyte sequences;
 		/// bytes will point to a byte sequence of n bytes where 
-		/// getCharacterMap()[*bytes] == -n.
+		/// sequenceLength(bytes, length) == -n, with length >= n.
 		///
 		/// The convert function must return the Unicode scalar value
 		/// represented by this byte sequence or -1 if the byte sequence is malformed.
 		/// The default implementation returns (int) bytes[0].

+	virtual	int queryConvert(const unsigned char* bytes, int length) const;
+		/// The queryConvert function is used to convert single byte characters 
+		/// or multibyte sequences;
+		/// bytes will point to a byte sequence of length bytes.
+		///
+		/// The queryConvert function must return the Unicode scalar value
+		/// represented by this byte sequence or -1 if the byte sequence is malformed
+		/// or -n where n is number of bytes requested for the sequence, if lenght is 
+		/// shorter than the sequence.
+		/// The length of the sequence might not be determined by the first byte, 
+		/// in which case the conversion becomes an iterative process:
+		/// First call with length == 1 might return -2,
+		/// Then a second call with lenght == 2 might return -4
+		/// Eventually, the third call with length == 4 should return either a 
+		/// Unicode scalar value, or -1 if the byte sequence is malformed.
+		/// The default implementation returns (int) bytes[0].
+
+	virtual int sequenceLength(const unsigned char* bytes, int length) const;
+		/// The sequenceLength function is used to get the lenth of the sequence pointed
+		/// by bytes. The length paramater should be greater or equal to the length of 
+		/// the sequence.
+		///
+		/// The sequenceLength function must return the lenght of the sequence
+		/// represented by this byte sequence or a negative value -n if length is 
+		/// shorter than the sequence, where n is the number of byte requested 
+		/// to determine the length of the sequence.
+		/// The length of the sequence might not be determined by the first byte, 
+		/// in which case the conversion becomes an iterative process as long as the 
+		/// result is negative:
+		/// First call with length == 1 might return -2,
+		/// Then a second call with lenght == 2 might return -4
+		/// Eventually, the third call with length == 4 should return 4.
+		/// The default implementation returns 1.
+
 	virtual int convert(int ch, unsigned char* bytes, int length) const;
 		/// Transform the Unicode character ch into the encoding's 
 		/// byte sequence. The method returns the number of bytes
--- a/Foundation/include/Poco/UTF16Encoding.h
+++ b/Foundation/include/Poco/UTF16Encoding.h
@ -91,6 +91,8 @@ public:
 	const CharacterMap& characterMap() const;
 	int convert(const unsigned char* bytes) const;
 	int convert(int ch, unsigned char* bytes, int length) const;
+	int queryConvert(const unsigned char* bytes, int length) const;
+	int sequenceLength(const unsigned char* bytes, int length) const;
 	
 private:
 	bool _flipBytes;
--- a/Foundation/include/Poco/UTF8Encoding.h
+++ b/Foundation/include/Poco/UTF8Encoding.h
@ -58,7 +58,9 @@ public:
 	const CharacterMap& characterMap() const;
 	int convert(const unsigned char* bytes) const;
 	int convert(int ch, unsigned char* bytes, int length) const;
-	
+	int queryConvert(const unsigned char* bytes, int length) const;
+	int sequenceLength(const unsigned char* bytes, int length) const;
+
 	static bool isLegal(const unsigned char *bytes, int length);
 		/// Utility routine to tell whether a sequence of bytes is legal UTF-8.
 		/// This must be called with the length pre-determined by the first byte.
--- a/Foundation/include/Poco/Windows1252Encoding.h
+++ b/Foundation/include/Poco/Windows1252Encoding.h
@ -58,6 +58,8 @@ public:
 	const CharacterMap& characterMap() const;
 	int convert(const unsigned char* bytes) const;
 	int convert(int ch, unsigned char* bytes, int length) const;
+	int queryConvert(const unsigned char* bytes, int length) const;
+	int sequenceLength(const unsigned char* bytes, int length) const;
 	
 private:
 	static const char* _names[];