mirror of
				https://github.com/pocoproject/poco.git
				synced 2025-10-25 18:22:59 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			84 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			84 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| //
 | |
| // UTF8Encoding.h
 | |
| //
 | |
| // $Id: //poco/1.4/Foundation/include/Poco/UTF8Encoding.h#1 $
 | |
| //
 | |
| // Library: Foundation
 | |
| // Package: Text
 | |
| // Module:  UTF8Encoding
 | |
| //
 | |
| // Definition of the UTF8Encoding class.
 | |
| //
 | |
| // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
 | |
| // and Contributors.
 | |
| //
 | |
| // Permission is hereby granted, free of charge, to any person or organization
 | |
| // obtaining a copy of the software and accompanying documentation covered by
 | |
| // this license (the "Software") to use, reproduce, display, distribute,
 | |
| // execute, and transmit the Software, and to prepare derivative works of the
 | |
| // Software, and to permit third-parties to whom the Software is furnished to
 | |
| // do so, all subject to the following:
 | |
| // 
 | |
| // The copyright notices in the Software and this entire statement, including
 | |
| // the above license grant, this restriction and the following disclaimer,
 | |
| // must be included in all copies of the Software, in whole or in part, and
 | |
| // all derivative works of the Software, unless such copies or derivative
 | |
| // works are solely in the form of machine-executable object code generated by
 | |
| // a source language processor.
 | |
| // 
 | |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
| // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 | |
| // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 | |
| // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 | |
| // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 | |
| // DEALINGS IN THE SOFTWARE.
 | |
| //
 | |
| 
 | |
| 
 | |
| #ifndef Foundation_UTF8Encoding_INCLUDED
 | |
| #define Foundation_UTF8Encoding_INCLUDED
 | |
| 
 | |
| 
 | |
| #include "Poco/Foundation.h"
 | |
| #include "Poco/TextEncoding.h"
 | |
| 
 | |
| 
 | |
| namespace Poco {
 | |
| 
 | |
| 
 | |
| class Foundation_API UTF8Encoding: public TextEncoding
 | |
| 	/// UTF-8 text encoding, as defined in RFC 2279.
 | |
| {
 | |
| public:
 | |
| 	UTF8Encoding();
 | |
| 	~UTF8Encoding();
 | |
| 	const char* canonicalName() const;
 | |
| 	bool isA(const std::string& encodingName) const;
 | |
| 	const CharacterMap& characterMap() const;
 | |
| 	int convert(const unsigned char* bytes) const;
 | |
| 	int convert(int ch, unsigned char* bytes, int length) const;
 | |
| 	int queryConvert(const unsigned char* bytes, int length) const;
 | |
| 	int sequenceLength(const unsigned char* bytes, int length) const;
 | |
| 
 | |
| 	static bool isLegal(const unsigned char *bytes, int length);
 | |
| 		/// Utility routine to tell whether a sequence of bytes is legal UTF-8.
 | |
| 		/// This must be called with the length pre-determined by the first byte.
 | |
| 		/// The sequence is illegal right away if there aren't enough bytes 
 | |
| 		/// available. If presented with a length > 4, this function returns false.
 | |
| 		/// The Unicode definition of UTF-8 goes up to 4-byte sequences.
 | |
| 		/// 
 | |
| 		/// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
 | |
| 		/// Copyright 2001-2004 Unicode, Inc.
 | |
| 
 | |
| private:
 | |
| 	static const char* _names[];
 | |
| 	static const CharacterMap _charMap;
 | |
| };
 | |
| 
 | |
| 
 | |
| } // namespace Poco
 | |
| 
 | |
| 
 | |
| #endif // Foundation_UTF8Encoding_INCLUDED
 | 
