GH #713: Improved support for producing Canonical XML in XMLWriter

This commit is contained in:
Guenter Obiltschnig 2016-02-28 11:20:02 +01:00
parent 317ef6df38
commit 2c6a74c4f5
4 changed files with 262 additions and 3 deletions

View File

@ -73,6 +73,9 @@ public:
/// - do not write an XML declaration
/// - do not use special empty element syntax
/// - set the New Line character to NEWLINE_LF
/// - write namespace declarations and attributes
/// in canonical order
/// - use default namespace as much as possible
WRITE_XML_DECLARATION = 0x02,
/// Write an XML declaration.
@ -279,8 +282,10 @@ public:
protected:
typedef std::map<XMLString, XMLString> AttributeMap;
typedef std::map<XMLString, std::pair<XMLString, XMLString> > CanonicalAttributeMap;
void writeStartElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes);
void writeCanonicalStartElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes);
void writeEndElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname);
void writeMarkup(const std::string& str) const;
void writeXML(const XMLString& str) const;
@ -291,10 +296,14 @@ protected:
void writeName(const XMLString& prefix, const XMLString& localName);
void writeXMLDeclaration();
void closeStartTag();
void declareNamespaces(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes);
void declareAttributeNamespaces(const Attributes& attributes);
void addNamespaceAttributes(AttributeMap& attributeMap);
void addNamespaceAttributes(CanonicalAttributeMap& attributeMap);
void addAttributes(AttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI);
void addAttributes(CanonicalAttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI);
void writeAttributes(const AttributeMap& attributeMap);
void writeAttributes(const CanonicalAttributeMap& attributeMap);
void prettyPrint() const;
static std::string nameToString(const XMLString& localName, const XMLString& qname);

View File

@ -257,7 +257,10 @@ void XMLWriter::startElement(const XMLString& namespaceURI, const XMLString& loc
if (_unclosedStartTag) closeStartTag();
prettyPrint();
writeStartElement(namespaceURI, localName, qname, attributes);
if (_options & CANONICAL_XML)
writeCanonicalStartElement(namespaceURI, localName, qname, attributes);
else
writeStartElement(namespaceURI, localName, qname, attributes);
_elementStack.push_back(Name(qname, namespaceURI, localName));
_contentWritten = false;
++_depth;
@ -296,7 +299,10 @@ void XMLWriter::emptyElement(const XMLString& namespaceURI, const XMLString& loc
if (_unclosedStartTag) closeStartTag();
prettyPrint();
writeStartElement(namespaceURI, localName, qname, attributes);
if (_options & CANONICAL_XML)
writeCanonicalStartElement(namespaceURI, localName, qname, attributes);
else
writeStartElement(namespaceURI, localName, qname, attributes);
_contentWritten = false;
writeMarkup("/");
closeStartTag();
@ -633,6 +639,36 @@ void XMLWriter::writeStartElement(const XMLString& namespaceURI, const XMLString
}
void XMLWriter::writeCanonicalStartElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes)
{
if (!_nsContextPushed)
_namespaces.pushContext();
_nsContextPushed = false;
++_elementCount;
declareNamespaces(namespaceURI, localName, qname, attributes);
writeMarkup(MARKUP_LT);
if (!localName.empty())
{
writeName(_namespaces.getPrefix(namespaceURI), localName);
}
else if (namespaceURI.empty() && !qname.empty())
{
writeXML(qname);
}
else throw XMLException("Tag mismatch", nameToString(localName, qname));
CanonicalAttributeMap namespaceAttributeMap;
addNamespaceAttributes(namespaceAttributeMap);
writeAttributes(namespaceAttributeMap);
CanonicalAttributeMap attributeMap;
addAttributes(attributeMap, attributes, namespaceURI);
writeAttributes(attributeMap);
_unclosedStartTag = true;
}
void XMLWriter::writeEndElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname)
{
if (_unclosedStartTag && !(_options & CANONICAL_XML))
@ -670,6 +706,72 @@ void XMLWriter::closeStartTag()
}
void XMLWriter::declareNamespaces(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes)
{
std::map<XMLString, std::set<XMLString> > usedNamespaces;
bool defaultNameSpaceUsed = false;
XMLString defaultNamespaceURI = _namespaces.getURI(std::string());
XMLString local;
XMLString prefix;
XMLString elementNamespaceURI = namespaceURI;
Name::split(qname, prefix, local);
if (elementNamespaceURI.empty())
elementNamespaceURI = _namespaces.getURI(prefix);
if (!elementNamespaceURI.empty())
{
usedNamespaces[prefix].insert(elementNamespaceURI);
if (!defaultNamespaceURI.empty() && elementNamespaceURI == defaultNamespaceURI)
defaultNameSpaceUsed = true;
}
for (int i = 0; i < attributes.getLength(); i++)
{
XMLString attributeNamespaceURI = attributes.getURI(i);
XMLString attributeLocalName = attributes.getLocalName(i);
XMLString attributeQName = attributes.getQName(i);
XMLString attributePrefix;
XMLString attributeLocal;
Name::split(attributeQName, attributePrefix, attributeLocal);
if (attributeNamespaceURI.empty())
attributeNamespaceURI = _namespaces.getURI(prefix);
if (!attributeNamespaceURI.empty())
{
usedNamespaces[attributePrefix].insert(attributeNamespaceURI);
defaultNameSpaceUsed = defaultNameSpaceUsed || (!defaultNamespaceURI.empty() && attributeNamespaceURI == defaultNamespaceURI);
}
}
for (std::map<XMLString, std::set<XMLString> >::const_iterator it = usedNamespaces.begin(); it != usedNamespaces.end(); ++it)
{
const std::set<XMLString> namespaceURIs = it->second;
for (std::set<XMLString>::const_iterator itURI = namespaceURIs.begin(); itURI != namespaceURIs.end(); ++itURI)
{
XMLString prefix = it->first;
if (prefix.empty())
prefix = _namespaces.getPrefix(*itURI);
if (prefix.empty() && !_namespaces.isMapped(*itURI))
{
if (defaultNameSpaceUsed)
{
if (*itURI != defaultNamespaceURI)
prefix = uniquePrefix();
}
else
{
defaultNamespaceURI = *itURI;
defaultNameSpaceUsed = true;
}
}
const XMLString& uri = _namespaces.getURI(prefix);
if ((uri.empty() || uri != *itURI) && !itURI->empty())
{
_namespaces.declarePrefix(prefix, *itURI);
}
}
}
}
void XMLWriter::declareAttributeNamespaces(const Attributes& attributes)
{
for (int i = 0; i < attributes.getLength(); i++)
@ -719,6 +821,26 @@ void XMLWriter::addNamespaceAttributes(AttributeMap& attributeMap)
}
void XMLWriter::addNamespaceAttributes(CanonicalAttributeMap& attributeMap)
{
NamespaceSupport::PrefixSet prefixes;
_namespaces.getDeclaredPrefixes(prefixes);
for (NamespaceSupport::PrefixSet::const_iterator it = prefixes.begin(); it != prefixes.end(); ++it)
{
XMLString prefix = *it;
XMLString uri = _namespaces.getURI(prefix);
XMLString qname = NamespaceSupport::XMLNS_NAMESPACE_PREFIX;
if (!prefix.empty())
{
qname.append(toXMLString(MARKUP_COLON));
qname.append(prefix);
}
attributeMap.insert(std::make_pair(qname, std::make_pair(qname, uri)));
}
}
void XMLWriter::addAttributes(AttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI)
{
for (int i = 0; i < attributes.getLength(); i++)
@ -744,6 +866,38 @@ void XMLWriter::addAttributes(AttributeMap& attributeMap, const Attributes& attr
}
void XMLWriter::addAttributes(CanonicalAttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI)
{
for (int i = 0; i < attributes.getLength(); i++)
{
XMLString namespaceURI = attributes.getURI(i);
XMLString localName = attributes.getLocalName(i);
XMLString qname = attributes.getQName(i);
XMLString fullQName = qname;
if (!localName.empty())
{
XMLString prefix;
if (!namespaceURI.empty())
{
prefix = _namespaces.getPrefix(namespaceURI);
fullQName = namespaceURI;
fullQName.append(toXMLString(MARKUP_COLON));
}
else fullQName.clear();
if (!prefix.empty())
{
qname = prefix;
qname.append(toXMLString(MARKUP_COLON));
}
else qname.clear();
qname.append(localName);
fullQName.append(localName);
}
attributeMap.insert(std::make_pair(fullQName, std::make_pair(qname, attributes.getValue(i))));
}
}
void XMLWriter::writeAttributes(const AttributeMap& attributeMap)
{
for (AttributeMap::const_iterator it = attributeMap.begin(); it != attributeMap.end(); ++it)
@ -783,6 +937,45 @@ void XMLWriter::writeAttributes(const AttributeMap& attributeMap)
}
void XMLWriter::writeAttributes(const CanonicalAttributeMap& attributeMap)
{
for (CanonicalAttributeMap::const_iterator it = attributeMap.begin(); it != attributeMap.end(); ++it)
{
if ((_options & PRETTY_PRINT) && (_options & PRETTY_PRINT_ATTRIBUTES))
{
writeNewLine();
writeIndent(_depth + 1);
}
else
{
writeMarkup(MARKUP_SPACE);
}
writeXML(it->second.first);
writeMarkup(MARKUP_EQQUOT);
for (XMLString::const_iterator itc = it->second.second.begin(); itc != it->second.second.end(); ++itc)
{
XMLChar c = *itc;
switch (c)
{
case '"': writeMarkup(MARKUP_QUOTENC); break;
case '&': writeMarkup(MARKUP_AMPENC); break;
case '<': writeMarkup(MARKUP_LTENC); break;
case '>': writeMarkup(MARKUP_GTENC); break;
case '\t': writeMarkup(MARKUP_TABENC); break;
case '\r': writeMarkup(MARKUP_CRENC); break;
case '\n': writeMarkup(MARKUP_LFENC); break;
default:
if (c >= 0 && c < 32)
throw XMLException("Invalid character token.");
else
writeXML(c);
}
}
writeMarkup(MARKUP_QUOT);
}
}
void XMLWriter::writeMarkup(const std::string& str) const
{
#if defined(XML_UNICODE_WCHAR_T)

View File

@ -472,6 +472,21 @@ void XMLWriterTest::testNamespaces()
assert (xml == "<ns1:r xmlns:ns1=\"urn:ns\">data</ns1:r>");
}
void XMLWriterTest::testNamespacesCanonical()
{
std::ostringstream str;
XMLWriter writer(str, XMLWriter::CANONICAL_XML);
writer.startDocument();
writer.startElement("urn:ns", "r", "");
writer.characters("data");
writer.endElement("urn:ns", "r", "");
writer.endDocument();
std::string xml = str.str();
assert (xml == "<r xmlns=\"urn:ns\">data</r>");
}
void XMLWriterTest::testAttributeNamespaces()
{
std::ostringstream str;
@ -489,6 +504,23 @@ void XMLWriterTest::testAttributeNamespaces()
}
void XMLWriterTest::testAttributeNamespacesCanonical()
{
std::ostringstream str;
XMLWriter writer(str, XMLWriter::CANONICAL_XML);
Poco::XML::AttributesImpl attrs;
attrs.addAttribute("urn:other", "myattr", "", "", "attrValue");
attrs.addAttribute("urn:ns", "myattr2", "", "", "attrValue2");
writer.startDocument();
writer.startElement("urn:ns", "r", "", attrs);
writer.characters("data");
writer.endElement("urn:ns", "r", "");
writer.endDocument();
std::string xml = str.str();
assert (xml == "<r xmlns=\"urn:ns\" xmlns:ns1=\"urn:other\" myattr2=\"attrValue2\" ns1:myattr=\"attrValue\">data</r>");
}
void XMLWriterTest::testNamespacesNested()
{
std::ostringstream str;
@ -506,6 +538,25 @@ void XMLWriterTest::testNamespacesNested()
}
void XMLWriterTest::testNamespacesNestedCanonical()
{
std::ostringstream str;
XMLWriter writer(str, XMLWriter::CANONICAL_XML);
writer.startDocument();
writer.startElement("urn:ns1", "r", "");
writer.startElement("urn:ns1", "e", "");
writer.endElement("urn:ns1", "e", "");
Poco::XML::AttributesImpl attrs;
attrs.addAttribute("urn:ns1", "myattr", "myattr", "", "attrValue");
writer.startElement("urn:ns2", "f", "", attrs);
writer.endElement("urn:ns2", "f", "");
writer.endElement("urn:ns1", "r", "");
writer.endDocument();
std::string xml = str.str();
assert (xml == "<r xmlns=\"urn:ns1\"><e></e><ns1:f xmlns:ns1=\"urn:ns2\" myattr=\"attrValue\"></ns1:f></r>");
}
void XMLWriterTest::testExplicitNamespaces()
{
std::ostringstream str;
@ -639,8 +690,11 @@ CppUnit::Test* XMLWriterTest::suite()
CppUnit_addTest(pSuite, XMLWriterTest, testQNamespaces);
CppUnit_addTest(pSuite, XMLWriterTest, testQNamespacesNested);
CppUnit_addTest(pSuite, XMLWriterTest, testNamespaces);
CppUnit_addTest(pSuite, XMLWriterTest, testNamespacesCanonical);
CppUnit_addTest(pSuite, XMLWriterTest, testAttributeNamespaces);
CppUnit_addTest(pSuite, XMLWriterTest, testAttributeNamespacesCanonical);
CppUnit_addTest(pSuite, XMLWriterTest, testNamespacesNested);
CppUnit_addTest(pSuite, XMLWriterTest, testNamespacesNestedCanonical);
CppUnit_addTest(pSuite, XMLWriterTest, testExplicitNamespaces);
CppUnit_addTest(pSuite, XMLWriterTest, testWellformed);
CppUnit_addTest(pSuite, XMLWriterTest, testWellformedNested);

View File

@ -54,12 +54,15 @@ public:
void testQNamespaces();
void testQNamespacesNested();
void testNamespaces();
void testNamespacesCanonical();
void testAttributeNamespaces();
void testAttributeNamespacesCanonical();
void testNamespacesNested();
void testNamespacesNestedCanonical();
void testExplicitNamespaces();
void testWellformed();
void testWellformedNested();
void testWellformedNamespace();
void testAttributeNamespaces();
void testEmpty();
void setUp();