diff --git a/XML/Makefile b/XML/Makefile index 0d8991fc7..59a9a719b 100644 --- a/XML/Makefile +++ b/XML/Makefile @@ -19,10 +19,10 @@ objects = AbstractContainerNode AbstractNode Attr AttrMap Attributes \ EntityResolverImpl ErrorHandler Event EventDispatcher EventException \ EventListener EventTarget InputSource LexicalHandler Locator LocatorImpl \ MutationEvent Name NamePool NamedNodeMap NamespaceStrategy \ - NamespaceSupport Node NodeFilter NodeIterator NodeList Notation \ - ParserEngine ProcessingInstruction SAXException SAXParser Text \ + NamespaceSupport NodeAppender Node NodeFilter NodeIterator NodeList Notation \ + ParserEngine ProcessingInstruction QName SAXException SAXParser Text \ TreeWalker WhitespaceFilter XMLException XMLFilter XMLFilterImpl XMLReader \ - XMLString XMLWriter NodeAppender + XMLString XMLWriter XMLStreamParser XMLStreamParserException expat_objects = xmlparse xmlrole xmltok diff --git a/XML/include/Poco/XML/Content.h b/XML/include/Poco/XML/Content.h new file mode 100644 index 000000000..6a5630304 --- /dev/null +++ b/XML/include/Poco/XML/Content.h @@ -0,0 +1,60 @@ +// +// Content.h +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: Content +// +// Definition of the Content enum. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + + +#ifndef POCO_XML_CONTENT +#define POCO_XML_CONTENT + + +namespace Poco +{ +namespace XML +{ + + +/// XML content model. C++11 enum class emulated for C++98. +struct Content +{ + enum value + { + // element characters whitespaces notes + Empty, // no no ignored + Simple, // no yes preserved content accumulated + Complex, // yes no ignored + Mixed // yes yes preserved + }; + + Content(value v) + : v_(v) + { + } + + operator value() const + { + return v_; + } + +private: + value v_; +}; + + +} +} + +#endif // XML_CONTENT diff --git a/XML/include/Poco/XML/QName.h b/XML/include/Poco/XML/QName.h new file mode 100644 index 000000000..881957c3a --- /dev/null +++ b/XML/include/Poco/XML/QName.h @@ -0,0 +1,122 @@ +// +// QName.h +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: QName +// +// Definition of the QName class. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef POCO_XML_QNAME_HXX +#define POCO_XML_QNAME_HXX + +#include "Poco/XML/XML.h" + +#include +#include + + +namespace Poco +{ +namespace XML +{ + +// Note that the optional prefix is just a "syntactic sugar". In +// particular, it is ignored by the comparison operators and the +// std::ostream insertion operator. +// +class XML_API QName +{ +public: + QName(); + QName(const std::string& name); + QName(const std::string& ns, const std::string& name); + QName(const std::string& ns, const std::string& name, const std::string& prefix); + + const std::string& namespace_() const; + const std::string& name() const; + const std::string& prefix() const; + std::string& namespace_(); + std::string& name(); + std::string& prefix(); + + // Printable representation in the [#] form. + // + std::string string() const; + + // Note that comparison operators + // +public: + friend bool operator<(const QName& x, const QName& y) + { + return x._ns < y._ns || (x._ns == y._ns && x._name < y._name); + } + + friend bool operator==(const QName& x, const QName& y) + { + return x._ns == y._ns && x._name == y._name; + } + + friend bool operator!=(const QName& x, const QName& y) + { + return !(x == y); + } + +private: + std::string _ns; + std::string _name; + std::string _prefix; +}; + + +inline const std::string& QName::namespace_() const +{ + return _ns; +} + + +inline const std::string& QName::name() const +{ + return _name; +} + + +inline const std::string& QName::prefix() const +{ + return _prefix; +} + + +inline std::string& QName::namespace_() +{ + return _ns; +} + + +inline std::string& QName::name() +{ + return _name; +} + + +inline std::string& QName::prefix() +{ + return _prefix; +} + + +XML_API std::ostream& operator<<(std::ostream&, const QName&); + +} +} + +#endif // CUTL_XML_QNAME_HXX diff --git a/XML/include/Poco/XML/ValueTraits.h b/XML/include/Poco/XML/ValueTraits.h new file mode 100644 index 000000000..1bdd6ce61 --- /dev/null +++ b/XML/include/Poco/XML/ValueTraits.h @@ -0,0 +1,105 @@ +// +// ValueTraits.h +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: ValueTraits +// +// Definition of the ValueTraits templates. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + + +#ifndef POCO_XML_VALUE_TRAITS_HXX +#define POCO_XML_VALUE_TRAITS_HXX + + +#include "XMLStreamParserException.h" +#include +#include // std::size_t +#include +#include + +namespace Poco +{ +namespace XML +{ + + +class XMLStreamParser; +class XMLStreamSerializer; + + +template +struct default_value_traits +{ + static T + parse(std::string, const XMLStreamParser&); + + static std::string + serialize(const T&, const XMLStreamSerializer&); +}; + + +template<> +struct XML_API default_value_traits +{ + static bool + parse(std::string, const XMLStreamParser&); + + static std::string serialize(bool v, const XMLStreamSerializer&) + { + return v ? "true" : "false"; + } +}; + + +template<> +struct XML_API default_value_traits +{ + static std::string parse(std::string s, const XMLStreamParser&) + { + return s; + } + + static std::string serialize(const std::string& v, const XMLStreamSerializer&) + { + return v; + } +}; + + +template +struct ValueTraits: default_value_traits +{ +}; + + +template +struct ValueTraits : default_value_traits +{ +}; + + +template +T default_value_traits::parse(std::string s, const XMLStreamParser& p) +{ + T r; + std::istringstream is(s); + if (!(is >> r && is.eof())) + throw XMLStreamParserException(p, "invalid value '" + s + "'"); + return r; +} + + +} +} + +#endif // CUTL_XML_VALUE_TRAITS_HXX diff --git a/XML/include/Poco/XML/XMLStreamParser.h b/XML/include/Poco/XML/XMLStreamParser.h new file mode 100644 index 000000000..3b6330487 --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamParser.h @@ -0,0 +1,636 @@ +// +// XMLStreamParser.h +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: XMLStreamParser +// +// Definition of the XMLStreamParser class. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file +// + + +#ifndef POCO_XML_XMLSTREAMPARSER_INCLUDED +#define POCO_XML_XMLSTREAMPARSER_INCLUDED + + +// We only support UTF-8 expat. +// +#ifdef XML_UNICODE +# error UTF-16 expat (XML_UNICODE defined) is not supported +#endif + +#include "Poco/XML/QName.h" +#include "Poco/XML/ValueTraits.h" +#include "Poco/XML/Content.h" +#include +#include +#include +#include +#include +#include // std::size_t +#include + + +namespace Poco +{ +namespace XML +{ + + +class XML_API XMLStreamParser + /// The streaming XML pull parser and streaming XML serializer. The parser + /// is a conforming, non-validating XML 1.0 implementation (see Implementation Notes + /// for details). The application character encoding (that is, the encoding used + /// in the application's memory) for both parser and serializer is UTF-8. + /// The output encoding of the serializer is UTF-8 as well. The parser supports + /// UTF-8, UTF-16, ISO-8859-1, and US-ASCII input encodings. + /// + /// Attribute map: + /// + /// Attribute map lookup. If attribute is not found, then the version + /// without the default value throws an appropriate parsing exception + /// while the version with the default value returns that value. + /// + /// Note also that there is no attribute(ns,name) version since it + /// would conflict with attribute(name,dv) (qualified attributes + /// are not very common). + /// + /// Attribute map is valid throughout at the "element level" until + /// end_element and not just during startElement. As a special case, + /// the map is still valid after peek() that returned end_element until + /// this end_element event is retrieved with next(). + /// + /// For more information see: http://www.codesynthesis.com/projects/libstudxml/doc/intro.xhtml + /// + /// Using parser: + /// @code + /// XMLStreamParser p (ifs, argv[1]); + /// for (XMLStreamParser::EventType e: p) + /// { + /// switch (e) + /// { + /// case XMLStreamParser::startElement: + /// cerr << p.line () << ':' << p.column () << ": start " << p.name () << endl; break; + /// case XMLStreamParser::endElement: + /// cerr << p.line () << ':' << p.column () << ": end " << p.name () << endl; break; + /// case XMLStreamParser::startAttribute: + /// ... + /// case XMLStreamParser::endAttribute: + /// ... + /// case XMLStreamParser::characters: + /// ... + /// } + /// } + /// @endcode + /// +{ +public: + + /// Parsing events. + enum EventType + { + // If adding new events, also update the stream insertion operator. + StartElement, + EndElement, + StartAttribute, + EndAttribute, + Characters, + StartNamespaceDecl, + EndNamespaceDecl, + Eof + }; + + typedef unsigned short FeatureType; + + // If both receive_attributes_event and receive_attributes_map are + // specified, then receive_attributes_event is assumed. + // + static const FeatureType RECEIVE_ELEMENTS = 0x0001; + static const FeatureType RECEIVE_CHARACTERS = 0x0002; + static const FeatureType RECEIVE_ATTRIBUTE_MAP = 0x0004; + static const FeatureType RECEIVE_ATTRIBUTES_EVENT = 0x0008; + static const FeatureType RECEIVE_NAMESPACE_DECLS = 0x0010; + static const FeatureType RECEIVE_DEFAULT = RECEIVE_ELEMENTS | RECEIVE_CHARACTERS | RECEIVE_ATTRIBUTE_MAP; + + // Low-level attribute map access. Note that this API assumes + // all attributes are handled. + // + struct AttributeValueType + { + std::string value; + mutable bool handled; + }; + + typedef std::map AttributeMapType; + + // C++11 range-based for support. Generally, the iterator interface + // doesn't make much sense for the XMLStreamParser so for now we have an + // implementation that is just enough to the range-based for. + // + struct Iterator + { + typedef EventType value_type; + + Iterator(XMLStreamParser* p = 0, EventType e = Eof) : + p_(p), + e_(e) + { + } + value_type operator*() const + { + return e_; + } + Iterator& operator++() + { + e_ = p_->next(); + return *this; + } + + // Comparison only makes sense when comparing to end (eof). + // + bool operator==(Iterator y) const + { + return e_ == Eof && y.e_ == Eof; + } + bool operator!=(Iterator y) const + { + return !(*this == y); + } + + private: + XMLStreamParser* p_; + EventType e_; + }; + + Iterator begin() + { + return Iterator(this, next()); + } + Iterator end() + { + return Iterator(this, Eof); + } + + + XMLStreamParser(std::istream&, const std::string& input_name, FeatureType = RECEIVE_DEFAULT); + /// The parser constructor takes three arguments: the stream to parse, + /// input name that is used in diagnostics to identify the document being + /// parsed, and the list of events we want the parser to report. + /// + /// Parse std::istream. Input name is used in diagnostics to identify + /// the document being parsed. + /// + /// If stream exceptions are enabled then std::ios_base::failure + /// exception is used to report io errors (badbit and failbit). + /// Otherwise, those are reported as the parsing exception. + + XMLStreamParser(const void* data, std::size_t size, const std::string& input_name, FeatureType = RECEIVE_DEFAULT); + /// Parse memory buffer that contains the whole document. Input name + /// is used in diagnostics to identify the document being parsed. + + ~XMLStreamParser(); + + EventType next(); + /// Call the next() function when we are ready to handle the next piece of XML. + + void nextExpect(EventType); + /// Get the next event and make sure that it's what's expected. If it + /// is not, then throw an appropriate parsing exception. + + void nextExpect(EventType, const std::string& name); + void nextExpect(EventType, const QName& qname); + void nextExpect(EventType, const std::string& ns, const std::string& name); + + EventType peek(); + EventType event(); + // Return the even that was last returned by the call to next() or peek(). + + const std::string& inputName() const; + const QName& getQName() const; + const std::string& namespace_() const; + const std::string& name() const; + const std::string& prefix() const; + std::string& value(); + const std::string& value() const; + template T value() const; + Poco::UInt64 line() const; + Poco::UInt64 column() const; + const std::string& attribute(const std::string& name) const; + template + T attribute(const std::string& name) const; + std::string attribute(const std::string& name, const std::string& default_value) const; + template + T attribute(const std::string& name, const T& default_value) const; + const std::string& attribute(const QName& qname) const; + template + T attribute(const QName& qname) const; + std::string attribute(const QName& qname, const std::string& default_value) const; + template + T attribute(const QName& qname, const T& default_value) const; + bool attributePresent(const std::string& name) const; + bool attributePresent(const QName& qname) const; + + const AttributeMapType& attributeMap() const; + + // Note that you cannot get/set content while peeking. + // + void content(Content); + Content content() const; + + // Versions that also set the content. Event type must be startElement. + // + void nextExpect(EventType, const std::string& name, Content); + void nextExpect(EventType, const QName& qname, Content); + void nextExpect(EventType, const std::string& ns, const std::string& name, Content); + + // Helpers for parsing elements with simple content. The first two + // functions assume that startElement has already been parsed. The + // rest parse the complete element, from start to end. + // + // Note also that as with attribute(), there is no (namespace,name) + // overload since it would conflicts with (namespace,default_value). + // + std::string element(); + + template + T element(); + std::string element(const std::string& name); + std::string element(const QName& qname); + template + T element(const std::string& name); + template + T element(const QName& qname); + std::string element(const std::string& name, const std::string& default_value); + std::string element(const QName& qname, const std::string& default_value); + template + T element(const std::string& name, const T& default_value); + template + T element(const QName& qname, const T& default_value); + +private: + XMLStreamParser(const XMLStreamParser&); + XMLStreamParser& operator=(const XMLStreamParser&); + + static void XMLCALL start_element_(void*, const XML_Char*, const XML_Char**); + static void XMLCALL end_element_(void*, const XML_Char*); + static void XMLCALL characters_(void*, const XML_Char*, int); + static void XMLCALL start_namespace_decl_(void*, const XML_Char*, const XML_Char*); + static void XMLCALL end_namespace_decl_(void*, const XML_Char*); + + void init(); + EventType next_(bool peek); + EventType next_body(); + void handle_error(); + + // If size_ is 0, then data is std::istream. Otherwise, it is a buffer. + // + union + { + std::istream* is; + const void* buf; + } + _data; + + std::size_t _size; + + const std::string _inputName; + FeatureType feature_; + + XML_Parser p_; + std::size_t depth_; + bool _accumulateContent; // Whether we are accumulating character content. + enum + { + state_next, state_peek + } + _parserState; + EventType _currentEvent; + EventType queue_; + + QName qname_; + std::string value_; + + // These are used to avoid copying when we are handling attributes + // and namespace decls. + // + const QName* _qualifiedName; + std::string* pvalue_; + + Poco::UInt64 _line; + Poco::UInt64 _column; + + // Attributes as events. + // + struct attribute_type + { + QName qname; + std::string value; + }; + + typedef std::vector attributes; + + attributes _attributes; + attributes::size_type _currentAttributeIndex; // Index of the current attribute. + + // Namespace declarations. + // + typedef std::vector namespace_decls; + + namespace_decls start_ns_; + namespace_decls::size_type start_ns_i_;// Index of the current decl. + + namespace_decls end_ns_; + namespace_decls::size_type end_ns_i_;// Index of the current decl. + + // Element state consisting of the content model and attribute map. + // + struct ElementEntry + { + ElementEntry(std::size_t d, Content c = Content::Mixed) : + depth(d), + content(c), + attributesUnhandled_(0) + { + } + + std::size_t depth; + Content content; + AttributeMapType attr_map_; + mutable AttributeMapType::size_type attributesUnhandled_; + }; + + typedef std::vector ElementState; + std::vector _elementState; + + // Empty attribute map to return when an element has no attributes. + // + const AttributeMapType _emptyAttrMap; + + // Return the element entry corresponding to the current depth, if + // exists, and NULL otherwise. + // + const ElementEntry* getElement() const; + + const ElementEntry* get_element_() const; + + void popElement(); +}; + +XML_API std::ostream& operator<<(std::ostream&, XMLStreamParser::EventType); + +inline XMLStreamParser::EventType XMLStreamParser::event() + // Return the even that was last returned by the call to next() or peek(). +{ + return _currentEvent; +} + +inline const std::string& XMLStreamParser::inputName() const +{ + return _inputName; +} + +inline const QName& XMLStreamParser::getQName() const +{ + return *_qualifiedName; +} + +inline const std::string& XMLStreamParser::namespace_() const +{ + return _qualifiedName->namespace_(); +} + +inline const std::string& XMLStreamParser::name() const +{ + return _qualifiedName->name(); +} + +inline const std::string& XMLStreamParser::prefix() const +{ + return _qualifiedName->prefix(); +} + +inline std::string& XMLStreamParser::value() +{ + return *pvalue_; +} + +inline const std::string& XMLStreamParser::value() const +{ + return *pvalue_; +} + + +inline Poco::UInt64 XMLStreamParser::line() const +{ + return _line; +} + +inline Poco::UInt64 XMLStreamParser::column() const +{ + return _column; +} + +inline XMLStreamParser::EventType XMLStreamParser::peek() +{ + if (_parserState == state_peek) + return _currentEvent; + else + { + EventType e(next_(true)); + _parserState = state_peek; // Set it after the call to next_(). + return e; + } +} + +template +inline T XMLStreamParser::value() const +{ + return ValueTraits < T > ::parse(value(), *this); +} + +inline const std::string& XMLStreamParser::attribute(const std::string& n) const +{ + return attribute(QName(n)); +} + +template +inline T XMLStreamParser::attribute(const std::string& n) const +{ + return attribute < T > (QName(n)); +} + +inline std::string XMLStreamParser::attribute(const std::string& n, const std::string& dv) const +{ + return attribute(QName(n), dv); +} + +template +inline T XMLStreamParser::attribute(const std::string& n, const T& dv) const +{ + return attribute < T > (QName(n), dv); +} + +template +inline T XMLStreamParser::attribute(const QName& qn) const +{ + return ValueTraits < T > ::parse(attribute(qn), *this); +} + +inline bool XMLStreamParser::attributePresent(const std::string& n) const +{ + return attributePresent(QName(n)); +} + +inline const XMLStreamParser::AttributeMapType& XMLStreamParser::attributeMap() const +{ + if (const ElementEntry* e = getElement()) + { + e->attributesUnhandled_ = 0; // Assume all handled. + return e->attr_map_; + } + + return _emptyAttrMap; +} + +inline void XMLStreamParser::nextExpect(EventType e, const QName& qn) +{ + nextExpect(e, qn.namespace_(), qn.name()); +} + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& n) +{ + nextExpect(e, std::string(), n); +} + +inline void XMLStreamParser::nextExpect(EventType e, const QName& qn, Content c) +{ + nextExpect(e, qn); + assert(e == StartElement); + content(c); +} + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& n, Content c) +{ + nextExpect(e, std::string(), n); + assert(e == StartElement); + content(c); +} + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& ns, const std::string& n, Content c) +{ + nextExpect(e, ns, n); + assert(e == StartElement); + content(c); +} + +template +inline T XMLStreamParser::element() +{ + return ValueTraits < T > ::parse(element(), *this); +} + +inline std::string XMLStreamParser::element(const std::string& n) +{ + nextExpect(StartElement, n); + return element(); +} + +inline std::string XMLStreamParser::element(const QName& qn) +{ + nextExpect(StartElement, qn); + return element(); +} + +template +inline T XMLStreamParser::element(const std::string& n) +{ + return ValueTraits < T > ::parse(element(n), *this); +} + +template +inline T XMLStreamParser::element(const QName& qn) +{ + return ValueTraits < T > ::parse(element(qn), *this); +} + +inline std::string XMLStreamParser::element(const std::string& n, const std::string& dv) +{ + return element(QName(n), dv); +} + +template +inline T XMLStreamParser::element(const std::string& n, const T& dv) +{ + return element < T > (QName(n), dv); +} + +inline void XMLStreamParser::content(Content c) +{ + assert(_parserState == state_next); + + if (!_elementState.empty() && _elementState.back().depth == depth_) + _elementState.back().content = c; + else + _elementState.push_back(ElementEntry(depth_, c)); +} + +inline Content XMLStreamParser::content() const +{ + assert(_parserState == state_next); + + return !_elementState.empty() && _elementState.back().depth == depth_ ? _elementState.back().content : Content(Content::Mixed); +} + +inline const XMLStreamParser::ElementEntry* XMLStreamParser::getElement() const +{ + return _elementState.empty() ? 0 : get_element_(); +} + +template +T XMLStreamParser::attribute(const QName& qn, const T& dv) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled_--; + } + return ValueTraits < T > ::parse(i->second.value, *this); + } + } + + return dv; +} + +template +T XMLStreamParser::element(const QName& qn, const T& dv) +{ + if (peek() == StartElement && getQName() == qn) + { + next(); + return element(); + } + + return dv; +} + + +} +} + + +#endif // CUTL_XML_PARSER_HXX diff --git a/XML/include/Poco/XML/XMLStreamParserException.h b/XML/include/Poco/XML/XMLStreamParserException.h new file mode 100644 index 000000000..e2cfd460b --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamParserException.h @@ -0,0 +1,58 @@ +// +// XMLStreamParserException.h +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: XMLStreamParserException +// +// Definition of the XMLStreamParserException class. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + + +#ifndef POCO_XML_XMLSTREAMPARSEREXCEPTION_H_ +#define POCO_XML_XMLSTREAMPARSEREXCEPTION_H_ + +#include + +namespace Poco +{ +namespace XML +{ +class XMLStreamParser; + +class XML_API XMLStreamParserException : +public Poco::XML::XMLException +{ +public: + XMLStreamParserException(const std::string& name, Poco::UInt64 line, Poco::UInt64 column, const std::string& description); + XMLStreamParserException(const XMLStreamParser&, const std::string& description); + virtual ~XMLStreamParserException() throw (); + + const char* name() const throw(); + Poco::UInt64 line() const; + Poco::UInt64 column() const; + const std::string& description() const; + virtual const char* what() const throw (); + +private: + void init(); + + std::string _name; + Poco::UInt64 _line; + Poco::UInt64 _column; + std::string _description; + std::string _what; +}; + +} +/* namespace XML */ +} /* namespace Poco */ +#endif /* POCO_XML_XMLSTREAMPARSEREXCEPTION_H_ */ diff --git a/XML/samples/CMakeLists.txt b/XML/samples/CMakeLists.txt index 659591ff6..7df6ecf42 100644 --- a/XML/samples/CMakeLists.txt +++ b/XML/samples/CMakeLists.txt @@ -2,4 +2,5 @@ add_subdirectory(DOMParser) add_subdirectory(DOMWriter) add_subdirectory(PrettyPrint) add_subdirectory(SAXParser) +#add_subdirectory(RoundTrip) diff --git a/XML/samples/RoundTrip/CMakeLists.txt b/XML/samples/RoundTrip/CMakeLists.txt new file mode 100644 index 000000000..2da54f203 --- /dev/null +++ b/XML/samples/RoundTrip/CMakeLists.txt @@ -0,0 +1,7 @@ +set(SAMPLE_NAME "RoundTrip") + +set(LOCAL_SRCS "") +aux_source_directory(src LOCAL_SRCS) + +add_executable( ${SAMPLE_NAME} ${LOCAL_SRCS} ) +target_link_libraries( ${SAMPLE_NAME} PocoXML PocoFoundation ) diff --git a/XML/samples/RoundTrip/Makefile b/XML/samples/RoundTrip/Makefile new file mode 100644 index 000000000..a85b3944a --- /dev/null +++ b/XML/samples/RoundTrip/Makefile @@ -0,0 +1,21 @@ +# +# Makefile +# +# $Id: //poco/1.4/XML/samples/SAXParser/Makefile#1 $ +# +# Makefile for Poco SAXParser +# + +include $(POCO_BASE)/build/rules/global + +objects = RoundTrip + +target = RoundTrip +target_version = 1 +target_libs = PocoXML PocoFoundation + +include $(POCO_BASE)/build/rules/exec + +ifdef POCO_UNBUNDLED + SYSLIBS += -lexpat +endif diff --git a/XML/samples/RoundTrip/src/RoundTrip.cpp b/XML/samples/RoundTrip/src/RoundTrip.cpp new file mode 100644 index 000000000..3b136c908 --- /dev/null +++ b/XML/samples/RoundTrip/src/RoundTrip.cpp @@ -0,0 +1,100 @@ +// file : examples/roundtrip/driver.cxx +// copyright : not copyrighted - public domain + +#include +#include +#include + +#include "Poco/XML/XMLStreamParser.h" + +using namespace std; +using namespace Poco::XML; + +int main(int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " " << endl; + return 1; + } + + try + { + // Enable stream exceptions so that io failures are reported + // as stream rather than as parsing exceptions. + // + ifstream ifs; + ifs.exceptions(ifstream::badbit | ifstream::failbit); + ifs.open(argv[1], ifstream::in | ifstream::binary); + + // Configure the parser to receive attributes as events as well + // as to receive prefix-namespace mappings (namespace declarations + // in XML terminology). + // + XMLStreamParser p(ifs, argv[1], + XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_ATTRIBUTES_EVENT | XMLStreamParser::RECEIVE_NAMESPACE_DECLS); + + // Configure serializer not to perform indentation. Existing + // indentation, if any, will be preserved. + // + XMLStreamSerializer s(cout, "out", 0); + + for (XMLStreamParser::EventType e(p.next()); e != XMLStreamParser::Eof; e = p.next()) + { + switch (e) + { + case XMLStreamParser::StartElement: + { + s.startElement(p.getQName()); + break; + } + case XMLStreamParser::EndElement: + { + s.endElement(); + break; + } + case XMLStreamParser::StartNamespaceDecl: + { + s.namespaceDecl(p.namespace_(), p.prefix()); + break; + } + case XMLStreamParser::EndNamespaceDecl: + { + // There is nothing in XML that indicates the end of namespace + // declaration since it is scope-based. + // + break; + } + case XMLStreamParser::StartAttribute: + { + s.startAttribute(p.getQName()); + break; + } + case XMLStreamParser::EndAttribute: + { + s.endAttribute(); + break; + } + case XMLStreamParser::Characters: + { + s.characters(p.value()); + break; + } + case XMLStreamParser::Eof: + { + // Handled in the for loop. + // + break; + } + } + } + } catch (const ios_base::failure&) + { + cerr << "io failure" << endl; + return 1; + } catch (const Poco::Exception& e) + { + cerr << e.displayText() << endl; + return 1; + } +} diff --git a/XML/src/QName.cpp b/XML/src/QName.cpp new file mode 100644 index 000000000..5cbd5da3f --- /dev/null +++ b/XML/src/QName.cpp @@ -0,0 +1,74 @@ +// +// QName.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: QName +// +// Definition of the QName class. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include "Poco/XML/QName.h" +#include + +using namespace std; + +namespace Poco +{ +namespace XML +{ + +QName::QName() +{ +} + + +QName::QName(const std::string& name) : + _name(name) +{ +} + + +QName::QName(const std::string& ns, const std::string& name) : + _ns(ns), + _name(name) +{ +} + + +QName::QName(const std::string& ns, const std::string& name, const std::string& prefix) : + _ns(ns), + _name(name), + _prefix(prefix) +{ +} + + +string QName::string() const +{ + std::string r; + if (!_ns.empty()) + { + r += _ns; + r += '#'; + } + + r += _name; + return r; +} + +ostream& operator<<(ostream& os, const QName& qn) +{ + return os << qn.string(); +} + +} +} diff --git a/XML/src/ValueTraits.cpp b/XML/src/ValueTraits.cpp new file mode 100644 index 000000000..f11c33b5a --- /dev/null +++ b/XML/src/ValueTraits.cpp @@ -0,0 +1,45 @@ +// +// ValueTraits.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: ValueTraits +// +// Definition of the ValueTraits templates. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + + +#include "Poco/XML/XMLStreamParser.h" +#include "Poco/XML/XMLStreamParserException.h" + + +using namespace std; + + +namespace Poco +{ +namespace XML +{ + + +bool default_value_traits::parse(string s, const XMLStreamParser& p) +{ + if (s == "true" || s == "1" || s == "True" || s == "TRUE") + return true; + else if (s == "false" || s == "0" || s == "False" || s == "FALSE") + return false; + else + throw XMLStreamParserException(p, "invalid bool value '" + s + "'"); +} + + +} +} diff --git a/XML/src/XMLStreamParser.cpp b/XML/src/XMLStreamParser.cpp new file mode 100644 index 000000000..8e5b40978 --- /dev/null +++ b/XML/src/XMLStreamParser.cpp @@ -0,0 +1,943 @@ +// +// XMLStreamParser.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: XMLStreamParser +// +// Definition of the XMLStreamParser class. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include "Poco/XML/XMLStreamParser.h" +#include // std::bad_alloc +#include +#include // std::strchr +#include +#include +#include + +using namespace std; + +namespace Poco +{ +namespace XML +{ + +struct stream_exception_controller +{ + ~stream_exception_controller() + { + istream::iostate s = is_.rdstate(); + s &= ~istream::failbit; + + // If our error state (sans failbit) intersects with the + // exception state then that means we have an active + // exception and changing error/exception state will + // cause another to be thrown. + // + if (!(old_state_ & s)) + { + // Clear failbit if it was caused by eof. + // + if (is_.fail() && is_.eof()) + is_.clear(s); + + is_.exceptions(old_state_); + } + } + + stream_exception_controller(istream& is) + : is_(is), old_state_(is_.exceptions()) + { + is_.exceptions(old_state_ & ~istream::failbit); + } + +private: + stream_exception_controller(const stream_exception_controller&); + + stream_exception_controller& + operator=(const stream_exception_controller&); + +private: + istream& is_; + istream::iostate old_state_; +}; + + +static const char* parser_event_str[] = +{ + "start element", + "end element", + "start attribute", + "end attribute", + "characters", + "start namespace declaration", + "end namespace declaration", + "end of file" +}; + + +ostream& operator<<(ostream& os, XMLStreamParser::EventType e) +{ + return os << parser_event_str[e]; +} + + +XMLStreamParser::XMLStreamParser(std::istream& is, const std::string& iname, FeatureType f) + : _size(0), _inputName(iname), feature_(f) +{ + _data.is = &is; + init(); +} + + +XMLStreamParser::XMLStreamParser(const void* data, std::size_t size, const std::string& iname, FeatureType f) + : _size(size), _inputName(iname), feature_(f) +{ + assert(data != 0 && size != 0); + + _data.buf = data; + init(); +} + + +XMLStreamParser::~XMLStreamParser() +{ + if (p_ != 0) + XML_ParserFree(p_); +} + + +void XMLStreamParser::init() +{ + depth_ = 0; + _parserState = state_next; + _currentEvent = Eof; + queue_ = Eof; + + _qualifiedName = &qname_; + pvalue_ = &value_; + + _line = 0; + _column = 0; + + _currentAttributeIndex = 0; + start_ns_i_ = 0; + end_ns_i_ = 0; + + if ((feature_ & RECEIVE_ATTRIBUTE_MAP) != 0 && (feature_ & RECEIVE_ATTRIBUTES_EVENT) != 0) + feature_ &= ~RECEIVE_ATTRIBUTE_MAP; + + // Allocate the XMLStreamParser. Make sure nothing else can throw after + // this call since otherwise we will leak it. + // + p_ = XML_ParserCreateNS(0, XML_Char(' ')); + + if (p_ == 0) + throw bad_alloc(); + + // Get prefixes in addition to namespaces and local names. + // + XML_SetReturnNSTriplet(p_, true); + + // Set handlers. + // + XML_SetUserData(p_, this); + + if ((feature_ & RECEIVE_ELEMENTS) != 0) + { + XML_SetStartElementHandler(p_, &start_element_); + XML_SetEndElementHandler(p_, &end_element_); + } + + if ((feature_ & RECEIVE_CHARACTERS) != 0) + XML_SetCharacterDataHandler(p_, &characters_); + + if ((feature_ & RECEIVE_NAMESPACE_DECLS) != 0) + XML_SetNamespaceDeclHandler(p_, &start_namespace_decl_, &end_namespace_decl_); +} + + +void XMLStreamParser::handle_error() +{ + XML_Error e(XML_GetErrorCode(p_)); + + if (e == XML_ERROR_ABORTED) + { + // For now we only abort the XMLStreamParser in the characters_() and + // start_element_() handlers. + // + switch (content()) + { + case Content::Empty: + throw XMLStreamParserException(*this, "characters in empty content"); + case Content::Simple: + throw XMLStreamParserException(*this, "element in simple content"); + case Content::Complex: + throw XMLStreamParserException(*this, "characters in complex content"); + default: + assert(false); + } + } + else + throw XMLStreamParserException(_inputName, XML_GetCurrentLineNumber(p_), XML_GetCurrentColumnNumber(p_), XML_ErrorString(e)); +} + + +XMLStreamParser::EventType XMLStreamParser::next() +{ + if (_parserState == state_next) + return next_(false); + else + { + // If we previously peeked at start/end_element, then adjust + // state accordingly. + // + switch (_currentEvent) + { + case EndElement: + { + if (!_elementState.empty() && _elementState.back().depth == depth_) + popElement(); + + depth_--; + break; + } + case StartElement: + { + depth_++; + break; + } + default: + break; + } + + _parserState = state_next; + return _currentEvent; + } +} + + +const string& XMLStreamParser::attribute(const QName& qn) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled_--; + } + return i->second.value; + } + } + + throw XMLStreamParserException(*this, "attribute '" + qn.string() + "' expected"); +} + + +string XMLStreamParser::attribute(const QName& qn, const string& dv) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled_--; + } + return i->second.value; + } + } + + return dv; +} + + +bool XMLStreamParser::attributePresent(const QName& qn) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled_--; + } + return true; + } + } + + return false; +} + + +void XMLStreamParser::nextExpect(EventType e) +{ + if (next() != e) + throw XMLStreamParserException(*this, string(parser_event_str[e]) + " expected"); +} + + +void XMLStreamParser::nextExpect(EventType e, const string& ns, const string& n) +{ + if (next() != e || namespace_() != ns || name() != n) + throw XMLStreamParserException(*this, string(parser_event_str[e]) + " '" + QName(ns, n).string() + "' expected"); +} + + +string XMLStreamParser::element() +{ + content(Content::Simple); + string r; + + // The content of the element can be empty in which case there + // will be no characters event. + // + EventType e(next()); + if (e == Characters) + { + r.swap(value()); + e = next(); + } + + // We cannot really get anything other than end_element since + // the simple content validation won't allow it. + // + assert(e == EndElement); + + return r; +} + + +string XMLStreamParser::element(const QName& qn, const string& dv) +{ + if (peek() == StartElement && getQName() == qn) + { + next(); + return element(); + } + + return dv; +} + + +const XMLStreamParser::ElementEntry* XMLStreamParser::get_element_() const +{ + // The start_element_() Expat handler may have already provisioned + // an entry in the element stack. In this case, we need to get the + // one before it, if any. + // + const ElementEntry* r(0); + ElementState::size_type n(_elementState.size() - 1); + + if (_elementState[n].depth == depth_) + r = &_elementState[n]; + else if (n != 0 && _elementState[n].depth > depth_) + { + n--; + if (_elementState[n].depth == depth_) + r = &_elementState[n]; + } + + return r; +} + + +void XMLStreamParser::popElement() +{ + // Make sure there are no unhandled attributes left. + // + const ElementEntry& e(_elementState.back()); + if (e.attributesUnhandled_ != 0) + { + // Find the first unhandled attribute and report it. + // + for (AttributeMapType::const_iterator i(e.attr_map_.begin()); i != e.attr_map_.end(); ++i) + { + if (!i->second.handled) + throw XMLStreamParserException(*this, "unexpected attribute '" + i->first.string() + "'"); + } + assert(false); + } + + _elementState.pop_back(); +} + + +XMLStreamParser::EventType XMLStreamParser::next_(bool peek) +{ + EventType e(next_body()); + + // Content-specific processing. Note that we handle characters in the + // characters_() Expat handler for two reasons. Firstly, it is faster + // to ignore the whitespaces at the source. Secondly, this allows us + // to distinguish between element and attribute characters. We can + // move this processing to the handler because the characters event + // is never queued. + // + switch (e) + { + case EndElement: + { + // If this is a peek, then avoid popping the stack just yet. + // This way, the attribute map will still be valid until we + // call next(). + // + if (!peek) + { + if (!_elementState.empty() && _elementState.back().depth == depth_) + popElement(); + + depth_--; + } + break; + } + case StartElement: + { + if (const ElementEntry* e = getElement()) + { + switch (e->content) + { + case Content::Empty: + throw XMLStreamParserException(*this, "element in empty content"); + case Content::Simple: + throw XMLStreamParserException(*this, "element in simple content"); + default: + break; + } + } + + // If this is a peek, then delay adjusting the depth. + // + if (!peek) + depth_++; + + break; + } + default: + break; + } + + return e; +} + + +XMLStreamParser::EventType XMLStreamParser::next_body() +{ + // See if we have any start namespace declarations we need to return. + // + if (start_ns_i_ < start_ns_.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (_currentEvent) + { + case StartNamespaceDecl: + { + if (++start_ns_i_ == start_ns_.size()) + { + start_ns_i_ = 0; + start_ns_.clear(); + _qualifiedName = &qname_; + break; // No more declarations. + } + // Fall through. + } + case StartElement: + { + _currentEvent = StartNamespaceDecl; + _qualifiedName = &start_ns_[start_ns_i_]; + return _currentEvent; + } + default: + { + assert(false); + return _currentEvent = Eof; + } + } + } + + // See if we have any attributes we need to return as events. + // + if (_currentAttributeIndex < _attributes.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (_currentEvent) + { + case StartAttribute: + { + _currentEvent = Characters; + pvalue_ = &_attributes[_currentAttributeIndex].value; + return _currentEvent; + } + case Characters: + { + _currentEvent = EndAttribute; // Name is already set. + return _currentEvent; + } + case EndAttribute: + { + if (++_currentAttributeIndex == _attributes.size()) + { + _currentAttributeIndex = 0; + _attributes.clear(); + _qualifiedName = &qname_; + pvalue_ = &value_; + break; // No more attributes. + } + // Fall through. + } + case StartElement: + case StartNamespaceDecl: + { + _currentEvent = StartAttribute; + _qualifiedName = &_attributes[_currentAttributeIndex].qname; + return _currentEvent; + } + default: + { + assert(false); + return _currentEvent = Eof; + } + } + } + + // See if we have any end namespace declarations we need to return. + // + if (end_ns_i_ < end_ns_.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (_currentEvent) + { + case EndNamespaceDecl: + { + if (++end_ns_i_ == end_ns_.size()) + { + end_ns_i_ = 0; + end_ns_.clear(); + _qualifiedName = &qname_; + break; // No more declarations. + } + // Fall through. + } + // The end namespace declaration comes before the end element + // which means it can follow pretty much any other event. + // + default: + { + _currentEvent = EndNamespaceDecl; + _qualifiedName = &end_ns_[end_ns_i_]; + return _currentEvent; + } + } + } + + // Check the queue. + // + if (queue_ != Eof) + { + _currentEvent = queue_; + queue_ = Eof; + + _line = XML_GetCurrentLineNumber(p_); + _column = XML_GetCurrentColumnNumber(p_); + + return _currentEvent; + } + + // Reset the character accumulation flag. + // + _accumulateContent = false; + + XML_ParsingStatus ps; + XML_GetParsingStatus(p_, &ps); + + switch (ps.parsing) + { + case XML_INITIALIZED: + { + // As if we finished the previous chunk. + break; + } + case XML_PARSING: + { + assert(false); + return _currentEvent = Eof; + } + case XML_FINISHED: + { + return _currentEvent = Eof; + } + case XML_SUSPENDED: + { + switch (XML_ResumeParser(p_)) + { + case XML_STATUS_SUSPENDED: + { + // If the XMLStreamParser is again in the suspended state, then + // that means we have the next event. + // + return _currentEvent; + } + case XML_STATUS_OK: + { + // Otherwise, we need to get and parse the next chunk of data + // unless this was the last chunk, in which case this is eof. + // + if (ps.finalBuffer) + return _currentEvent = Eof; + + break; + } + case XML_STATUS_ERROR: + handle_error(); + } + break; + } + } + + // Get and parse the next chunk of data until we get the next event + // or reach eof. + // + if (!_accumulateContent) + _currentEvent = Eof; + + XML_Status s; + do + { + if (_size != 0) + { + s = XML_Parse(p_, static_cast(_data.buf), static_cast(_size), true); + + if (s == XML_STATUS_ERROR) + handle_error(); + + break; + } + else + { + const size_t cap(4096); + + char* b(static_cast(XML_GetBuffer(p_, cap))); + if (b == 0) + throw bad_alloc(); + + // Temporarily unset the exception failbit. Also clear the fail bit + // when we reset the old state if it was caused by eof. + // + istream& is(*_data.is); + { + stream_exception_controller sec(is); + is.read(b, static_cast(cap)); + } + + // If the caller hasn't configured the stream to use exceptions, + // then use the parsing exception to report an error. + // + if (is.bad() || (is.fail() && !is.eof())) + throw XMLStreamParserException(*this, "io failure"); + + bool eof(is.eof()); + + s = XML_ParseBuffer(p_, static_cast(is.gcount()), eof); + + if (s == XML_STATUS_ERROR) + handle_error(); + + if (eof) + break; + } + } while (s != XML_STATUS_SUSPENDED); + + return _currentEvent; +} + + +static void splitName(const XML_Char* s, QName& qn) +{ + string& ns(qn.namespace_()); + string& name(qn.name()); + string& prefix(qn.prefix()); + + const char* p(strchr(s, ' ')); + + if (p == 0) + { + ns.clear(); + name = s; + prefix.clear(); + } + else + { + ns.assign(s, 0, p - s); + + s = p + 1; + p = strchr(s, ' '); + + if (p == 0) + { + name = s; + prefix.clear(); + } + else + { + name.assign(s, 0, p - s); + prefix = p + 1; + } + } +} + + +void XMLCALL XMLStreamParser::start_element_(void* v, const XML_Char* name, const XML_Char** atts) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // Cannot be a followup event. + // + assert(ps.parsing == XML_PARSING); + + // When accumulating characters in simple content, we expect to + // see more characters or end element. Seeing start element is + // possible but means violation of the content model. + // + if (p._accumulateContent) + { + // It would have been easier to throw the exception directly, + // however, the Expat code is most likely not exception safe. + // + p._line = XML_GetCurrentLineNumber(p.p_); + p._column = XML_GetCurrentColumnNumber(p.p_); + XML_StopParser(p.p_, false); + return; + } + + p._currentEvent = StartElement; + splitName(name, p.qname_); + + p._line = XML_GetCurrentLineNumber(p.p_); + p._column = XML_GetCurrentColumnNumber(p.p_); + + // Handle attributes. + // + if (*atts != 0) + { + bool am((p.feature_ & RECEIVE_ATTRIBUTE_MAP) != 0); + bool ae((p.feature_ & RECEIVE_ATTRIBUTES_EVENT) != 0); + + // Provision an entry for this element. + // + ElementEntry* pe(0); + if (am) + { + p._elementState.push_back(ElementEntry(p.depth_ + 1)); + pe = &p._elementState.back(); + } + + if (am || ae) + { + for (; *atts != 0; atts += 2) + { + if (am) + { + QName qn; + splitName(*atts, qn); + AttributeMapType::value_type v(qn, AttributeValueType()); + v.second.value = *(atts + 1); + v.second.handled = false; + pe->attr_map_.insert(v); + } + else + { + p._attributes.push_back(attribute_type()); + splitName(*atts, p._attributes.back().qname); + p._attributes.back().value = *(atts + 1); + } + } + + if (am) + pe->attributesUnhandled_ = pe->attr_map_.size(); + } + } + + XML_StopParser(p.p_, true); +} + + +void XMLCALL XMLStreamParser::end_element_(void* v, const XML_Char* name) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // This can be a followup event for empty elements (). In this + // case the element name is already set. + // + if (ps.parsing != XML_PARSING) + p.queue_ = EndElement; + else + { + splitName(name, p.qname_); + + // If we are accumulating characters, then queue this event. + // + if (p._accumulateContent) + p.queue_ = EndElement; + else + { + p._currentEvent = EndElement; + + p._line = XML_GetCurrentLineNumber(p.p_); + p._column = XML_GetCurrentColumnNumber(p.p_); + } + + XML_StopParser(p.p_, true); + } +} + + +void XMLCALL XMLStreamParser::characters_(void* v, const XML_Char* s, int n) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + Content cont(p.content()); + + // If this is empty or complex content, see if these are whitespaces. + // + switch (cont) + { + case Content::Empty: + case Content::Complex: + { + for (int i(0); i != n; ++i) + { + char c(s[i]); + if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09) + continue; + + // It would have been easier to throw the exception directly, + // however, the Expat code is most likely not exception safe. + // + p._line = XML_GetCurrentLineNumber(p.p_); + p._column = XML_GetCurrentColumnNumber(p.p_); + XML_StopParser(p.p_, false); + break; + } + return; + } + default: + break; + } + + // Append the characters if we are accumulating. This can also be a + // followup event for another character event. In this case also + // append the data. + // + if (p._accumulateContent || ps.parsing != XML_PARSING) + { + assert(p._currentEvent == Characters); + p.value_.append(s, n); + } + else + { + p._currentEvent = Characters; + p.value_.assign(s, n); + + p._line = XML_GetCurrentLineNumber(p.p_); + p._column = XML_GetCurrentColumnNumber(p.p_); + + // In simple content we need to accumulate all the characters + // into a single event. To do this we will let the XMLStreamParser run + // until we reach the end of the element. + // + if (cont == Content::Simple) + p._accumulateContent = true; + else + XML_StopParser(p.p_, true); + } +} + + +void XMLCALL XMLStreamParser::start_namespace_decl_(void* v, const XML_Char* prefix, const XML_Char* ns) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + p.start_ns_.push_back(QName()); + p.start_ns_.back().prefix() = (prefix != 0 ? prefix : ""); + p.start_ns_.back().namespace_() = (ns != 0 ? ns : ""); +} + + +void XMLCALL XMLStreamParser::end_namespace_decl_(void* v, const XML_Char* prefix) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + p.end_ns_.push_back(QName()); + p.end_ns_.back().prefix() = (prefix != 0 ? prefix : ""); +} + + +} +} diff --git a/XML/src/XMLStreamParserException.cpp b/XML/src/XMLStreamParserException.cpp new file mode 100644 index 000000000..9a7b55288 --- /dev/null +++ b/XML/src/XMLStreamParserException.cpp @@ -0,0 +1,87 @@ +// +// XMLStreamParserException.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: XMLStreamParserException +// +// Definition of the XMLStreamParserException class. +// +// Copyright (c) 2004-2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + + +#include "Poco/XML/XMLStreamParserException.h" +#include "Poco/XML/XMLStreamParser.h" + +using namespace std; + +namespace Poco +{ +namespace XML +{ + + +XMLStreamParserException::~XMLStreamParserException() throw () +{ +} + + +XMLStreamParserException::XMLStreamParserException(const string& n, Poco::UInt64 l, Poco::UInt64 c, const string& d) + : _name(n), _line(l), _column(c), _description(d) +{ + init(); +} + + +XMLStreamParserException::XMLStreamParserException(const XMLStreamParser& p, const std::string& d) + : _name(p.inputName()), _line(p.line()), _column(p.column()), _description(d) +{ + init(); +} + + +void XMLStreamParserException::init() +{ + std::ostringstream os; + if (!_name.empty()) + os << _name << ':'; + os << _line << ':' << _column << ": error: " << _description; + _what = os.str(); +} + + +const char* XMLStreamParserException::name() const throw() +{ + return _name.c_str(); +} + +Poco::UInt64 XMLStreamParserException::line() const +{ + return _line; +} + +Poco::UInt64 XMLStreamParserException::column() const +{ + return _column; +} + +const std::string& XMLStreamParserException::description() const +{ + return _description; +} + +char const* XMLStreamParserException::what() const throw () +{ + return _what.c_str(); +} + + +} /* namespace XML */ +} /* namespace Poco */ diff --git a/XML/testsuite/Makefile b/XML/testsuite/Makefile index b71638b23..f2818ddf0 100644 --- a/XML/testsuite/Makefile +++ b/XML/testsuite/Makefile @@ -12,7 +12,8 @@ objects = AttributesImplTest ChildNodesTest DOMTestSuite DocumentTest \ DocumentTypeTest Driver ElementTest EventTest NamePoolTest NameTest \ NamespaceSupportTest NodeIteratorTest NodeTest ParserWriterTest \ SAXParserTest SAXTestSuite TextTest TreeWalkerTest \ - XMLTestSuite XMLWriterTest NodeAppenderTest + XMLTestSuite XMLWriterTest NodeAppenderTest XMLStreamSerializerTestSuite \ + XMLStreamParserTestSuite target = testrunner target_version = 1 diff --git a/XML/testsuite/src/XMLStreamParserTestSuite.cpp b/XML/testsuite/src/XMLStreamParserTestSuite.cpp new file mode 100644 index 000000000..6af418e20 --- /dev/null +++ b/XML/testsuite/src/XMLStreamParserTestSuite.cpp @@ -0,0 +1,489 @@ +// +// XMLStreamParserTestSuite.cpp +// +// $Id: //poco/1.4/XML/testsuite/src/XMLStreamParserTestSuite.cpp#4 $ +// +// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// + +#include "XMLStreamParserTestSuite.h" +#include "CppUnit/TestCaller.h" +#include "CppUnit/TestSuite.h" +#include "Poco/XML/XMLStreamParser.h" +#include "Poco/Exception.h" +#include +#include +#include +#include + +using namespace Poco::XML; +using namespace std; + +XMLStreamParserTestSuite::XMLStreamParserTestSuite(const std::string& name) + : CppUnit::TestCase(name) +{ +} + +XMLStreamParserTestSuite::~XMLStreamParserTestSuite() +{ +} + +void XMLStreamParserTestSuite::testParser() +{ + // Test error handling. + // + try + { + istringstream is("X"); + XMLStreamParser p(is, "test"); + + poco_assert(p.next() == XMLStreamParser::StartElement); + poco_assert(p.next() == XMLStreamParser::StartElement); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == "X"); + p.next(); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + try + { + istringstream is(""); + is.exceptions(ios_base::badbit | ios_base::failbit); + XMLStreamParser p(is, "test"); + + is.setstate(ios_base::badbit); + p.next(); + poco_assert(false); + } catch (const ios_base::failure&) + { + } + + // Test the nextExpect() functionality. + // + { + istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root"); + p.nextExpect(XMLStreamParser::EndElement); + } + + try + { + istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EndElement); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + try + { + istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root1"); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // Test nextExpect() with content setting. + // + { + istringstream is(" "); + XMLStreamParser p(is, "empty"); + + p.nextExpect(XMLStreamParser::StartElement, "root", Content::Empty); + p.nextExpect(XMLStreamParser::EndElement); + p.nextExpect(XMLStreamParser::Eof); + } + + // Test namespace declarations. + // + { + // Followup end element event that should be precedeeded by end + // namespace declaration. + // + istringstream is(""); + XMLStreamParser p(is, "test", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_NAMESPACE_DECLS); + + p.nextExpect(XMLStreamParser::StartElement, "root"); + p.nextExpect(XMLStreamParser::StartNamespaceDecl); + p.nextExpect(XMLStreamParser::EndNamespaceDecl); + p.nextExpect(XMLStreamParser::EndElement); + } + + // Test value extraction. + // + { + istringstream is("123"); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root"); + p.nextExpect(XMLStreamParser::Characters); + poco_assert(p.value() == 123); + p.nextExpect(XMLStreamParser::EndElement); + } + + // Test attribute maps. + // + { + istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root"); + + poco_assert(p.attribute("a") == "a"); + poco_assert(p.attribute("b", "B") == "b"); + poco_assert(p.attribute("c", "C") == "C"); + poco_assert(p.attribute("d") == 123); + poco_assert(p.attribute("t") == true); + poco_assert(p.attribute("f", false) == false); + + p.nextExpect(XMLStreamParser::EndElement); + } + + { + istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root"); + poco_assert(p.attribute("a") == "a"); + poco_assert(p.peek() == XMLStreamParser::StartElement && p.name() == "nested"); + poco_assert(p.attribute("a") == "a"); + p.nextExpect(XMLStreamParser::StartElement, "nested"); + poco_assert(p.attribute("a") == "A"); + p.nextExpect(XMLStreamParser::StartElement, "inner"); + poco_assert(p.attribute("a", "") == ""); + p.nextExpect(XMLStreamParser::EndElement); + poco_assert(p.attribute("a") == "A"); + poco_assert(p.peek() == XMLStreamParser::EndElement); + poco_assert(p.attribute("a") == "A"); // Still valid. + p.nextExpect(XMLStreamParser::EndElement); + poco_assert(p.attribute("a") == "a"); + p.nextExpect(XMLStreamParser::EndElement); + poco_assert(p.attribute("a", "") == ""); + } + + try + { + istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root"); + poco_assert(p.attribute("a") == "a"); + p.nextExpect(XMLStreamParser::EndElement); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + try + { + istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root"); + p.attribute("a"); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // Test peeking and getting the current event. + // + { + istringstream is("x"); + XMLStreamParser p(is, "peek", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_ATTRIBUTES_EVENT); + + poco_assert(p.event() == XMLStreamParser::Eof); + + poco_assert(p.peek() == XMLStreamParser::StartElement); + poco_assert(p.next() == XMLStreamParser::StartElement); + poco_assert(p.event() == XMLStreamParser::StartElement); + + poco_assert(p.peek() == XMLStreamParser::StartAttribute); + poco_assert(p.event() == XMLStreamParser::StartAttribute); + poco_assert(p.next() == XMLStreamParser::StartAttribute); + + poco_assert(p.peek() == XMLStreamParser::Characters && p.value() == "x"); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == "x"); + poco_assert(p.event() == XMLStreamParser::Characters && p.value() == "x"); + + poco_assert(p.peek() == XMLStreamParser::EndAttribute); + poco_assert(p.event() == XMLStreamParser::EndAttribute); + poco_assert(p.next() == XMLStreamParser::EndAttribute); + + poco_assert(p.peek() == XMLStreamParser::Characters && p.value() == "x"); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == "x"); + poco_assert(p.event() == XMLStreamParser::Characters && p.value() == "x"); + + poco_assert(p.peek() == XMLStreamParser::StartElement); + poco_assert(p.next() == XMLStreamParser::StartElement); + poco_assert(p.event() == XMLStreamParser::StartElement); + + poco_assert(p.peek() == XMLStreamParser::EndElement); + poco_assert(p.next() == XMLStreamParser::EndElement); + poco_assert(p.event() == XMLStreamParser::EndElement); + + poco_assert(p.peek() == XMLStreamParser::EndElement); + poco_assert(p.next() == XMLStreamParser::EndElement); + poco_assert(p.event() == XMLStreamParser::EndElement); + + poco_assert(p.peek() == XMLStreamParser::Eof); + poco_assert(p.next() == XMLStreamParser::Eof); + poco_assert(p.event() == XMLStreamParser::Eof); + } + + // Test content processing. + // + + // empty + // + { + istringstream is(" \n\t "); + XMLStreamParser p(is, "empty", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_ATTRIBUTES_EVENT); + + poco_assert(p.next() == XMLStreamParser::StartElement); + p.content(Content::Empty); + poco_assert(p.next() == XMLStreamParser::StartAttribute); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == " x "); + poco_assert(p.next() == XMLStreamParser::EndAttribute); + poco_assert(p.next() == XMLStreamParser::EndElement); + poco_assert(p.next() == XMLStreamParser::Eof); + } + + try + { + istringstream is(" \n & X \t "); + XMLStreamParser p(is, "empty"); + + poco_assert(p.next() == XMLStreamParser::StartElement); + p.content(Content::Empty); + p.next(); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // simple + // + { + istringstream is(" X "); + XMLStreamParser p(is, "simple"); + + poco_assert(p.next() == XMLStreamParser::StartElement); + p.content(Content::Simple); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == " X "); + poco_assert(p.next() == XMLStreamParser::EndElement); + poco_assert(p.next() == XMLStreamParser::Eof); + } + + try + { + istringstream is(" ? "); + XMLStreamParser p(is, "simple"); + + poco_assert(p.next() == XMLStreamParser::StartElement); + p.content(Content::Simple); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == " ? "); + p.next(); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + { + // Test content accumulation in simple content. + // + istringstream is("123"); + XMLStreamParser p(is, "simple", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_NAMESPACE_DECLS); + + poco_assert(p.next() == XMLStreamParser::StartElement); + p.nextExpect(XMLStreamParser::StartNamespaceDecl); + p.content(Content::Simple); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == "123"); + p.nextExpect(XMLStreamParser::EndNamespaceDecl); + poco_assert(p.next() == XMLStreamParser::EndElement); + poco_assert(p.next() == XMLStreamParser::Eof); + } + + try + { + // Test error handling in accumulation in simple content. + // + istringstream is("123"); + XMLStreamParser p(is, "simple", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_NAMESPACE_DECLS); + + poco_assert(p.next() == XMLStreamParser::StartElement); + p.nextExpect(XMLStreamParser::StartNamespaceDecl); + p.content(Content::Simple); + p.next(); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // complex + // + { + istringstream is("\n" + " \n" + " \n" + " X \n" + " \n" + "\n"); + XMLStreamParser p(is, "complex", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_ATTRIBUTES_EVENT); + + poco_assert(p.next() == XMLStreamParser::StartElement); // root + p.content(Content::Complex); + + poco_assert(p.next() == XMLStreamParser::StartAttribute); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == " x "); + poco_assert(p.next() == XMLStreamParser::EndAttribute); + + poco_assert(p.next() == XMLStreamParser::StartElement); // nested + p.content(Content::Complex); + + poco_assert(p.next() == XMLStreamParser::StartElement); // inner + p.content(Content::Empty); + poco_assert(p.next() == XMLStreamParser::EndElement); // inner + + poco_assert(p.next() == XMLStreamParser::StartElement); // inner + p.content(Content::Simple); + poco_assert(p.next() == XMLStreamParser::Characters && p.value() == " X "); + poco_assert(p.next() == XMLStreamParser::EndElement); // inner + + poco_assert(p.next() == XMLStreamParser::EndElement); // nested + poco_assert(p.next() == XMLStreamParser::EndElement); // root + poco_assert(p.next() == XMLStreamParser::Eof); + } + + try + { + istringstream is(" \n X X "); + XMLStreamParser p(is, "complex"); + + poco_assert(p.next() == XMLStreamParser::StartElement); + p.content(Content::Complex); + poco_assert(p.next() == XMLStreamParser::StartElement); + poco_assert(p.next() == XMLStreamParser::EndElement); + p.next(); + poco_assert(false); + } catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // Test element with simple content helpers. + // + { + istringstream is("" + " X" + " " + " 123" + " Y" + " Z" + " 234" + " 345" + " A" + " B" + " A" + " B" + " 1" + " 2" + " 1" + " 2" + ""); + XMLStreamParser p(is, "element"); + + p.nextExpect(XMLStreamParser::StartElement, "root", Content::Complex); + + p.nextExpect(XMLStreamParser::StartElement, "nested"); + poco_assert(p.element() == "X"); + + p.nextExpect(XMLStreamParser::StartElement, "nested"); + poco_assert(p.element() == ""); + + p.nextExpect(XMLStreamParser::StartElement, "nested"); + poco_assert(p.element() == 123); + + poco_assert(p.element("nested") == "Y"); + poco_assert(p.element(QName("test", "nested")) == "Z"); + + poco_assert(p.element("nested") == 234); + poco_assert(p.element(QName("test", "nested")) == 345); + + poco_assert(p.element("nested", "a") == "A"); + poco_assert(p.element(QName("test", "nested"), "b") == "B"); + + poco_assert(p.element("nested", "a") == "a" && p.element("nested1") == "A"); + poco_assert(p.element(QName("test", "nested"), "b") == "b" && p.element(QName("test", "nested1")) == "B"); + + poco_assert(p.element("nested", 10) == 1); + poco_assert(p.element(QName("test", "nested"), 20) == 2); + + poco_assert(p.element("nested", 10) == 10 && p.element("nested1") == 1); + poco_assert(p.element(QName("test", "nested"), 20) == 20 && p.element(QName("test", "nested1")) == 2); + + p.nextExpect(XMLStreamParser::EndElement); + } + + // Test the iterator interface. + // + { + istringstream is("X"); + XMLStreamParser p(is, "iterator"); + + vector v; + + for (XMLStreamParser::Iterator i(p.begin()); i != p.end(); ++i) + v.push_back(*i); + + //for (XMLStreamParser::EventType e: p) + // v.push_back (e); + + poco_assert(v.size() == 5); + poco_assert(v[0] == XMLStreamParser::StartElement); + poco_assert(v[1] == XMLStreamParser::StartElement); + poco_assert(v[2] == XMLStreamParser::Characters); + poco_assert(v[3] == XMLStreamParser::EndElement); + poco_assert(v[4] == XMLStreamParser::EndElement); + } + + // Test space extraction into the std::string value. + // + { + istringstream is(" b "); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::StartElement, "root"); + poco_assert(p.attribute("a") == " a "); + p.nextExpect(XMLStreamParser::Characters); + poco_assert(p.value() == " b "); + p.nextExpect(XMLStreamParser::EndElement); + } +} + +void XMLStreamParserTestSuite::setUp() +{ +} + +void XMLStreamParserTestSuite::tearDown() +{ +} + +CppUnit::Test* XMLStreamParserTestSuite::suite() +{ + CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("XMLStreamParserTestSuite"); + + CppUnit_addTest(pSuite, XMLStreamParserTestSuite, testParser); + + return pSuite; +} diff --git a/XML/testsuite/src/XMLStreamParserTestSuite.h b/XML/testsuite/src/XMLStreamParserTestSuite.h new file mode 100644 index 000000000..5626448ae --- /dev/null +++ b/XML/testsuite/src/XMLStreamParserTestSuite.h @@ -0,0 +1,40 @@ +// +// XMLStreamParserTestSuite.h +// +// $Id: //poco/1.4/XML/testsuite/src/XMLStreamParserTestSuite.h#2 $ +// +// Definition of the XMLStreamParserTestSuite class. +// +// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef XMLStreamParserTestSuite_INCLUDED +#define XMLStreamParserTestSuite_INCLUDED + + +#include "Poco/XML/XML.h" +#include "CppUnit/TestCase.h" + + +class XMLStreamParserTestSuite: public CppUnit::TestCase +{ +public: + XMLStreamParserTestSuite(const std::string& name); + ~XMLStreamParserTestSuite(); + + void testParser(); + + void setUp(); + void tearDown(); + + static CppUnit::Test* suite(); + +private: +}; + + +#endif // XMLStreamParserTestSuite_INCLUDED diff --git a/XML/testsuite/src/XMLTestSuite.cpp b/XML/testsuite/src/XMLTestSuite.cpp index 28bb213ed..2ecc54c04 100644 --- a/XML/testsuite/src/XMLTestSuite.cpp +++ b/XML/testsuite/src/XMLTestSuite.cpp @@ -16,7 +16,7 @@ #include "XMLWriterTest.h" #include "SAXTestSuite.h" #include "DOMTestSuite.h" - +#include "XMLStreamParserTestSuite.h" CppUnit::Test* XMLTestSuite::suite() { @@ -27,6 +27,7 @@ CppUnit::Test* XMLTestSuite::suite() pSuite->addTest(XMLWriterTest::suite()); pSuite->addTest(SAXTestSuite::suite()); pSuite->addTest(DOMTestSuite::suite()); + pSuite->addTest(XMLStreamParserTestSuite::suite()); return pSuite; }