poco/Encodings/include/Poco/DoubleByteEncoding.h
2018-02-14 08:40:11 +01:00

123 lines
3.9 KiB
C++

//
// DoubleByteEncoding.h
//
// Library: Encodings
// Package: Encodings
// Module: DoubleByteEncoding
//
// Definition of the DoubleByteEncoding class.
//
// Copyright (c) 2018, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// SPDX-License-Identifier: BSL-1.0
//
#ifndef Encodings_DoubleByteEncoding_INCLUDED
#define Encodings_DoubleByteEncoding_INCLUDED
#include "Poco/Encodings.h"
#include "Poco/TextEncoding.h"
namespace Poco {
class Encodings_API DoubleByteEncoding: public TextEncoding
/// This abstract class is a base class for various double-byte character
/// set (DBCS) encodings.
///
/// Double-byte encodings are variants of multi-byte encodings
/// where (Unicode) each code point is represented by one or
/// two bytes. Unicode code points are restricted to the
/// Basic Multilingual Plane.
///
/// Subclasses must provide encoding names, a static CharacterMap, as well
/// as static Mapping and reverse Mapping tables, and provide these to the
/// DoubleByteEncoding constructor.
{
public:
struct Mapping
{
Poco::UInt16 from;
Poco::UInt16 to;
};
// TextEncoding
const char* canonicalName() const;
bool isA(const std::string& encodingName) const;
const CharacterMap& characterMap() const;
int convert(const unsigned char* bytes) const;
int convert(int ch, unsigned char* bytes, int length) const;
int queryConvert(const unsigned char* bytes, int length) const;
int sequenceLength(const unsigned char* bytes, int length) const;
protected:
DoubleByteEncoding(const char** names, const TextEncoding::CharacterMap& charMap, const Mapping mappingTable[], std::size_t mappingTableSize, const Mapping reverseMappingTable[], std::size_t reverseMappingTableSize);
/// Creates a DoubleByteEncoding using the given mapping and reverse-mapping tables.
///
/// names must be a static array declared in the derived class,
/// containing the names of this encoding, declared as:
///
/// const char* MyEncoding::_names[] =
/// {
/// "myencoding",
/// "MyEncoding",
/// NULL
/// };
///
/// The first entry in names must be the canonical name.
///
/// charMap must be a static CharacterMap giving information about double-byte
/// character sequences.
///
/// For each mappingTable item, from must be a value in range 0x0100 to
// 0xFFFF for double-byte mappings, which the most significant (upper) byte
/// representing the first character in the sequence and the lower byte
/// representing the second character in the sequence.
///
/// For each reverseMappingTable item, from must be Unicode code point from the
/// Basic Multilingual Plane, and to is a one-byte or two-byte sequence.
/// As with mappingTable, a one-byte sequence is in range 0x00 to 0xFF, and a
/// two-byte sequence is in range 0x0100 to 0xFFFF.
///
/// Unicode code points are restricted to the Basic Multilingual Plane
/// (code points 0x0000 to 0xFFFF).
///
/// Items in both tables must be sorted by from, in ascending order.
~DoubleByteEncoding();
/// Destroys the DoubleByteEncoding.
int map(Poco::UInt16 encoded) const;
/// Maps a double-byte encoded character to its Unicode code point.
///
/// Returns the Unicode code point, or -1 if the encoded character is bad
/// and cannot be mapped.
int reverseMap(int cp) const;
/// Maps a Unicode code point to its double-byte representation.
///
/// Returns -1 if the code point cannot be mapped, otherwise
/// a value in range 0 to 0xFF for single-byte mappings, or
/// 0x0100 to 0xFFFF for double-byte mappings.
private:
DoubleByteEncoding();
const char** _names;
const TextEncoding::CharacterMap& _charMap;
const Mapping* _mappingTable;
const std::size_t _mappingTableSize;
const Mapping* _reverseMappingTable;
const std::size_t _reverseMappingTableSize;
};
} // namespace Poco
#endif // Encodings_DoubleByteEncoding_INCLUDED