mirror of
https://github.com/pocoproject/poco.git
synced 2025-12-12 12:56:44 +01:00
feat(Foundation): Include utf8proc for enhanced Unicode support (#4710)
This commit is contained in:
@@ -20,7 +20,17 @@
|
||||
#include "Poco/UTF8Encoding.h"
|
||||
#include "Poco/NumberFormatter.h"
|
||||
#include "Poco/Ascii.h"
|
||||
#include "Poco/Buffer.h"
|
||||
#include "Poco/Exception.h"
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
|
||||
#if defined(POCO_UNBUNDLED)
|
||||
#include <utf8proc.h>
|
||||
#else
|
||||
#include "utf8proc.h"
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(POCO_OS_FAMILY_WINDOWS)
|
||||
@@ -410,4 +420,52 @@ std::string UTF8::unescape(const std::string::const_iterator& begin, const std::
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
std::string doNormalize(const char* str, std::size_t size, utf8proc_option_t options)
|
||||
{
|
||||
utf8proc_ssize_t n = utf8proc_decompose_custom(reinterpret_cast<const utf8proc_uint8_t*>(str), size, NULL, 0, options, NULL, NULL);
|
||||
if (n < 0) throw Poco::RuntimeException("Normalization decompose failed"s, utf8proc_errmsg(n));
|
||||
|
||||
Poco::Buffer<utf8proc_int32_t> buffer(n + 1); // utf8proc_reencode() needs space for terminating NUL
|
||||
n = utf8proc_decompose_custom(reinterpret_cast<const utf8proc_uint8_t*>(str), size, buffer.begin(), n, options, NULL, NULL);
|
||||
if (n < 0) throw Poco::RuntimeException("Normalization decompose failed"s, utf8proc_errmsg(n));
|
||||
|
||||
n = utf8proc_reencode(buffer.begin(), n, options);
|
||||
if (n < 0) throw Poco::RuntimeException("Normalization reeencode failed"s, utf8proc_errmsg(n));
|
||||
|
||||
return std::string(reinterpret_cast<char*>(buffer.begin()), n);
|
||||
}
|
||||
|
||||
int formToOptions(UTF8::NormalizationForm form)
|
||||
{
|
||||
switch (form)
|
||||
{
|
||||
case UTF8::NORMALIZATION_FORM_D:
|
||||
return UTF8PROC_STABLE | UTF8PROC_DECOMPOSE;
|
||||
case UTF8::NORMALIZATION_FORM_C:
|
||||
return UTF8PROC_STABLE | UTF8PROC_COMPOSE;
|
||||
case UTF8::NORMALIZATION_FORM_KD:
|
||||
return UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT;
|
||||
case UTF8::NORMALIZATION_FORM_KC:
|
||||
return UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::string UTF8::normalize(const std::string& s, NormalizationForm form)
|
||||
{
|
||||
return doNormalize(s.data(), s.size(), static_cast<utf8proc_option_t>(formToOptions(form)));
|
||||
}
|
||||
|
||||
|
||||
std::string UTF8::normalize(const std::string::const_iterator& begin, const std::string::const_iterator& end, NormalizationForm form)
|
||||
{
|
||||
return doNormalize(&*begin, static_cast<std::size_t>(std::distance(begin, end)), static_cast<utf8proc_option_t>(formToOptions(form)));
|
||||
}
|
||||
|
||||
|
||||
} // namespace Poco
|
||||
|
||||
Reference in New Issue
Block a user