From 2c6a74c4f509de45f7532f871233aa90fd74036c Mon Sep 17 00:00:00 2001 From: Guenter Obiltschnig Date: Sun, 28 Feb 2016 11:20:02 +0100 Subject: [PATCH] GH #713: Improved support for producing Canonical XML in XMLWriter --- XML/include/Poco/XML/XMLWriter.h | 9 ++ XML/src/XMLWriter.cpp | 197 +++++++++++++++++++++++++++- XML/testsuite/src/XMLWriterTest.cpp | 54 ++++++++ XML/testsuite/src/XMLWriterTest.h | 5 +- 4 files changed, 262 insertions(+), 3 deletions(-) diff --git a/XML/include/Poco/XML/XMLWriter.h b/XML/include/Poco/XML/XMLWriter.h index 2bdaae87b..cc9892a5c 100644 --- a/XML/include/Poco/XML/XMLWriter.h +++ b/XML/include/Poco/XML/XMLWriter.h @@ -73,6 +73,9 @@ public: /// - do not write an XML declaration /// - do not use special empty element syntax /// - set the New Line character to NEWLINE_LF + /// - write namespace declarations and attributes + /// in canonical order + /// - use default namespace as much as possible WRITE_XML_DECLARATION = 0x02, /// Write an XML declaration. @@ -279,8 +282,10 @@ public: protected: typedef std::map AttributeMap; + typedef std::map > CanonicalAttributeMap; void writeStartElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes); + void writeCanonicalStartElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes); void writeEndElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname); void writeMarkup(const std::string& str) const; void writeXML(const XMLString& str) const; @@ -291,10 +296,14 @@ protected: void writeName(const XMLString& prefix, const XMLString& localName); void writeXMLDeclaration(); void closeStartTag(); + void declareNamespaces(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes); void declareAttributeNamespaces(const Attributes& attributes); void addNamespaceAttributes(AttributeMap& attributeMap); + void addNamespaceAttributes(CanonicalAttributeMap& attributeMap); void addAttributes(AttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI); + void addAttributes(CanonicalAttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI); void writeAttributes(const AttributeMap& attributeMap); + void writeAttributes(const CanonicalAttributeMap& attributeMap); void prettyPrint() const; static std::string nameToString(const XMLString& localName, const XMLString& qname); diff --git a/XML/src/XMLWriter.cpp b/XML/src/XMLWriter.cpp index d0aaefa4f..5626d7462 100644 --- a/XML/src/XMLWriter.cpp +++ b/XML/src/XMLWriter.cpp @@ -257,7 +257,10 @@ void XMLWriter::startElement(const XMLString& namespaceURI, const XMLString& loc if (_unclosedStartTag) closeStartTag(); prettyPrint(); - writeStartElement(namespaceURI, localName, qname, attributes); + if (_options & CANONICAL_XML) + writeCanonicalStartElement(namespaceURI, localName, qname, attributes); + else + writeStartElement(namespaceURI, localName, qname, attributes); _elementStack.push_back(Name(qname, namespaceURI, localName)); _contentWritten = false; ++_depth; @@ -296,7 +299,10 @@ void XMLWriter::emptyElement(const XMLString& namespaceURI, const XMLString& loc if (_unclosedStartTag) closeStartTag(); prettyPrint(); - writeStartElement(namespaceURI, localName, qname, attributes); + if (_options & CANONICAL_XML) + writeCanonicalStartElement(namespaceURI, localName, qname, attributes); + else + writeStartElement(namespaceURI, localName, qname, attributes); _contentWritten = false; writeMarkup("/"); closeStartTag(); @@ -633,6 +639,36 @@ void XMLWriter::writeStartElement(const XMLString& namespaceURI, const XMLString } +void XMLWriter::writeCanonicalStartElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes) +{ + if (!_nsContextPushed) + _namespaces.pushContext(); + _nsContextPushed = false; + ++_elementCount; + + declareNamespaces(namespaceURI, localName, qname, attributes); + + writeMarkup(MARKUP_LT); + if (!localName.empty()) + { + writeName(_namespaces.getPrefix(namespaceURI), localName); + } + else if (namespaceURI.empty() && !qname.empty()) + { + writeXML(qname); + } + else throw XMLException("Tag mismatch", nameToString(localName, qname)); + + CanonicalAttributeMap namespaceAttributeMap; + addNamespaceAttributes(namespaceAttributeMap); + writeAttributes(namespaceAttributeMap); + CanonicalAttributeMap attributeMap; + addAttributes(attributeMap, attributes, namespaceURI); + writeAttributes(attributeMap); + _unclosedStartTag = true; +} + + void XMLWriter::writeEndElement(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname) { if (_unclosedStartTag && !(_options & CANONICAL_XML)) @@ -670,6 +706,72 @@ void XMLWriter::closeStartTag() } +void XMLWriter::declareNamespaces(const XMLString& namespaceURI, const XMLString& localName, const XMLString& qname, const Attributes& attributes) +{ + std::map > usedNamespaces; + bool defaultNameSpaceUsed = false; + XMLString defaultNamespaceURI = _namespaces.getURI(std::string()); + XMLString local; + XMLString prefix; + XMLString elementNamespaceURI = namespaceURI; + Name::split(qname, prefix, local); + if (elementNamespaceURI.empty()) + elementNamespaceURI = _namespaces.getURI(prefix); + if (!elementNamespaceURI.empty()) + { + usedNamespaces[prefix].insert(elementNamespaceURI); + if (!defaultNamespaceURI.empty() && elementNamespaceURI == defaultNamespaceURI) + defaultNameSpaceUsed = true; + } + for (int i = 0; i < attributes.getLength(); i++) + { + XMLString attributeNamespaceURI = attributes.getURI(i); + XMLString attributeLocalName = attributes.getLocalName(i); + XMLString attributeQName = attributes.getQName(i); + + XMLString attributePrefix; + XMLString attributeLocal; + Name::split(attributeQName, attributePrefix, attributeLocal); + if (attributeNamespaceURI.empty()) + attributeNamespaceURI = _namespaces.getURI(prefix); + if (!attributeNamespaceURI.empty()) + { + usedNamespaces[attributePrefix].insert(attributeNamespaceURI); + defaultNameSpaceUsed = defaultNameSpaceUsed || (!defaultNamespaceURI.empty() && attributeNamespaceURI == defaultNamespaceURI); + } + } + for (std::map >::const_iterator it = usedNamespaces.begin(); it != usedNamespaces.end(); ++it) + { + const std::set namespaceURIs = it->second; + for (std::set::const_iterator itURI = namespaceURIs.begin(); itURI != namespaceURIs.end(); ++itURI) + { + XMLString prefix = it->first; + if (prefix.empty()) + prefix = _namespaces.getPrefix(*itURI); + if (prefix.empty() && !_namespaces.isMapped(*itURI)) + { + if (defaultNameSpaceUsed) + { + if (*itURI != defaultNamespaceURI) + prefix = uniquePrefix(); + } + else + { + defaultNamespaceURI = *itURI; + defaultNameSpaceUsed = true; + } + + } + const XMLString& uri = _namespaces.getURI(prefix); + if ((uri.empty() || uri != *itURI) && !itURI->empty()) + { + _namespaces.declarePrefix(prefix, *itURI); + } + } + } +} + + void XMLWriter::declareAttributeNamespaces(const Attributes& attributes) { for (int i = 0; i < attributes.getLength(); i++) @@ -719,6 +821,26 @@ void XMLWriter::addNamespaceAttributes(AttributeMap& attributeMap) } +void XMLWriter::addNamespaceAttributes(CanonicalAttributeMap& attributeMap) +{ + NamespaceSupport::PrefixSet prefixes; + _namespaces.getDeclaredPrefixes(prefixes); + for (NamespaceSupport::PrefixSet::const_iterator it = prefixes.begin(); it != prefixes.end(); ++it) + { + XMLString prefix = *it; + XMLString uri = _namespaces.getURI(prefix); + XMLString qname = NamespaceSupport::XMLNS_NAMESPACE_PREFIX; + + if (!prefix.empty()) + { + qname.append(toXMLString(MARKUP_COLON)); + qname.append(prefix); + } + attributeMap.insert(std::make_pair(qname, std::make_pair(qname, uri))); + } +} + + void XMLWriter::addAttributes(AttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI) { for (int i = 0; i < attributes.getLength(); i++) @@ -744,6 +866,38 @@ void XMLWriter::addAttributes(AttributeMap& attributeMap, const Attributes& attr } +void XMLWriter::addAttributes(CanonicalAttributeMap& attributeMap, const Attributes& attributes, const XMLString& elementNamespaceURI) +{ + for (int i = 0; i < attributes.getLength(); i++) + { + XMLString namespaceURI = attributes.getURI(i); + XMLString localName = attributes.getLocalName(i); + XMLString qname = attributes.getQName(i); + XMLString fullQName = qname; + if (!localName.empty()) + { + XMLString prefix; + if (!namespaceURI.empty()) + { + prefix = _namespaces.getPrefix(namespaceURI); + fullQName = namespaceURI; + fullQName.append(toXMLString(MARKUP_COLON)); + } + else fullQName.clear(); + if (!prefix.empty()) + { + qname = prefix; + qname.append(toXMLString(MARKUP_COLON)); + } + else qname.clear(); + qname.append(localName); + fullQName.append(localName); + } + attributeMap.insert(std::make_pair(fullQName, std::make_pair(qname, attributes.getValue(i)))); + } +} + + void XMLWriter::writeAttributes(const AttributeMap& attributeMap) { for (AttributeMap::const_iterator it = attributeMap.begin(); it != attributeMap.end(); ++it) @@ -783,6 +937,45 @@ void XMLWriter::writeAttributes(const AttributeMap& attributeMap) } +void XMLWriter::writeAttributes(const CanonicalAttributeMap& attributeMap) +{ + for (CanonicalAttributeMap::const_iterator it = attributeMap.begin(); it != attributeMap.end(); ++it) + { + if ((_options & PRETTY_PRINT) && (_options & PRETTY_PRINT_ATTRIBUTES)) + { + writeNewLine(); + writeIndent(_depth + 1); + } + else + { + writeMarkup(MARKUP_SPACE); + } + writeXML(it->second.first); + writeMarkup(MARKUP_EQQUOT); + for (XMLString::const_iterator itc = it->second.second.begin(); itc != it->second.second.end(); ++itc) + { + XMLChar c = *itc; + switch (c) + { + case '"': writeMarkup(MARKUP_QUOTENC); break; + case '&': writeMarkup(MARKUP_AMPENC); break; + case '<': writeMarkup(MARKUP_LTENC); break; + case '>': writeMarkup(MARKUP_GTENC); break; + case '\t': writeMarkup(MARKUP_TABENC); break; + case '\r': writeMarkup(MARKUP_CRENC); break; + case '\n': writeMarkup(MARKUP_LFENC); break; + default: + if (c >= 0 && c < 32) + throw XMLException("Invalid character token."); + else + writeXML(c); + } + } + writeMarkup(MARKUP_QUOT); + } +} + + void XMLWriter::writeMarkup(const std::string& str) const { #if defined(XML_UNICODE_WCHAR_T) diff --git a/XML/testsuite/src/XMLWriterTest.cpp b/XML/testsuite/src/XMLWriterTest.cpp index 10667d039..0cb59b3d6 100644 --- a/XML/testsuite/src/XMLWriterTest.cpp +++ b/XML/testsuite/src/XMLWriterTest.cpp @@ -472,6 +472,21 @@ void XMLWriterTest::testNamespaces() assert (xml == "data"); } + +void XMLWriterTest::testNamespacesCanonical() +{ + std::ostringstream str; + XMLWriter writer(str, XMLWriter::CANONICAL_XML); + writer.startDocument(); + writer.startElement("urn:ns", "r", ""); + writer.characters("data"); + writer.endElement("urn:ns", "r", ""); + writer.endDocument(); + std::string xml = str.str(); + assert (xml == "data"); +} + + void XMLWriterTest::testAttributeNamespaces() { std::ostringstream str; @@ -489,6 +504,23 @@ void XMLWriterTest::testAttributeNamespaces() } +void XMLWriterTest::testAttributeNamespacesCanonical() +{ + std::ostringstream str; + XMLWriter writer(str, XMLWriter::CANONICAL_XML); + Poco::XML::AttributesImpl attrs; + attrs.addAttribute("urn:other", "myattr", "", "", "attrValue"); + attrs.addAttribute("urn:ns", "myattr2", "", "", "attrValue2"); + writer.startDocument(); + writer.startElement("urn:ns", "r", "", attrs); + writer.characters("data"); + writer.endElement("urn:ns", "r", ""); + writer.endDocument(); + std::string xml = str.str(); + assert (xml == "data"); +} + + void XMLWriterTest::testNamespacesNested() { std::ostringstream str; @@ -506,6 +538,25 @@ void XMLWriterTest::testNamespacesNested() } +void XMLWriterTest::testNamespacesNestedCanonical() +{ + std::ostringstream str; + XMLWriter writer(str, XMLWriter::CANONICAL_XML); + writer.startDocument(); + writer.startElement("urn:ns1", "r", ""); + writer.startElement("urn:ns1", "e", ""); + writer.endElement("urn:ns1", "e", ""); + Poco::XML::AttributesImpl attrs; + attrs.addAttribute("urn:ns1", "myattr", "myattr", "", "attrValue"); + writer.startElement("urn:ns2", "f", "", attrs); + writer.endElement("urn:ns2", "f", ""); + writer.endElement("urn:ns1", "r", ""); + writer.endDocument(); + std::string xml = str.str(); + assert (xml == ""); +} + + void XMLWriterTest::testExplicitNamespaces() { std::ostringstream str; @@ -639,8 +690,11 @@ CppUnit::Test* XMLWriterTest::suite() CppUnit_addTest(pSuite, XMLWriterTest, testQNamespaces); CppUnit_addTest(pSuite, XMLWriterTest, testQNamespacesNested); CppUnit_addTest(pSuite, XMLWriterTest, testNamespaces); + CppUnit_addTest(pSuite, XMLWriterTest, testNamespacesCanonical); CppUnit_addTest(pSuite, XMLWriterTest, testAttributeNamespaces); + CppUnit_addTest(pSuite, XMLWriterTest, testAttributeNamespacesCanonical); CppUnit_addTest(pSuite, XMLWriterTest, testNamespacesNested); + CppUnit_addTest(pSuite, XMLWriterTest, testNamespacesNestedCanonical); CppUnit_addTest(pSuite, XMLWriterTest, testExplicitNamespaces); CppUnit_addTest(pSuite, XMLWriterTest, testWellformed); CppUnit_addTest(pSuite, XMLWriterTest, testWellformedNested); diff --git a/XML/testsuite/src/XMLWriterTest.h b/XML/testsuite/src/XMLWriterTest.h index 65af24f83..67603ab39 100644 --- a/XML/testsuite/src/XMLWriterTest.h +++ b/XML/testsuite/src/XMLWriterTest.h @@ -54,12 +54,15 @@ public: void testQNamespaces(); void testQNamespacesNested(); void testNamespaces(); + void testNamespacesCanonical(); + void testAttributeNamespaces(); + void testAttributeNamespacesCanonical(); void testNamespacesNested(); + void testNamespacesNestedCanonical(); void testExplicitNamespaces(); void testWellformed(); void testWellformedNested(); void testWellformedNamespace(); - void testAttributeNamespaces(); void testEmpty(); void setUp();