diff --git a/XML/Makefile b/XML/Makefile index 0d8991fc7..7d25ee903 100644 --- a/XML/Makefile +++ b/XML/Makefile @@ -12,17 +12,18 @@ COMMONFLAGS += -DXML_NS -DXML_DTD -DHAVE_EXPAT_CONFIG_H objects = AbstractContainerNode AbstractNode Attr AttrMap Attributes \ AttributesImpl CDATASection CharacterData ChildNodesList Comment \ - ContentHandler DOMBuilder DOMException DOMImplementation DOMObject \ + Content ContentHandler DOMBuilder DOMException DOMImplementation DOMObject \ DOMParser DOMSerializer DOMWriter DTDHandler DTDMap DeclHandler \ DefaultHandler Document DocumentEvent DocumentFragment DocumentType \ Element ElementsByTagNameList Entity EntityReference EntityResolver \ EntityResolverImpl ErrorHandler Event EventDispatcher EventException \ EventListener EventTarget InputSource LexicalHandler Locator LocatorImpl \ MutationEvent Name NamePool NamedNodeMap NamespaceStrategy \ - NamespaceSupport Node NodeFilter NodeIterator NodeList Notation \ - ParserEngine ProcessingInstruction SAXException SAXParser Text \ + NamespaceSupport NodeAppender Node NodeFilter NodeIterator NodeList Notation \ + ParserEngine ProcessingInstruction QName SAXException SAXParser Text \ TreeWalker WhitespaceFilter XMLException XMLFilter XMLFilterImpl XMLReader \ - XMLString XMLWriter NodeAppender + XMLString XMLWriter XMLStreamParser XMLStreamParserException XMLStreamSerializer \ + XMLStreamSerializerException char-props genx expat_objects = xmlparse xmlrole xmltok diff --git a/XML/include/Poco/XML/Content.h b/XML/include/Poco/XML/Content.h new file mode 100644 index 000000000..2dde448c0 --- /dev/null +++ b/XML/include/Poco/XML/Content.h @@ -0,0 +1,43 @@ +// file : xml/content -*- C++ -*- +// copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef POCO_XML_CONTENT +#define POCO_XML_CONTENT + +namespace Poco +{ +namespace XML +{ + +/// XML content model. C++11 enum class emulated for C++98. +struct Content +{ + enum value + { + // element characters whitespaces notes + Empty, // no no ignored + Simple, // no yes preserved content accumulated + Complex, // yes no ignored + Mixed // yes yes preserved + }; + + Content(value v) + : v_(v) + { + } + + operator value() const + { + return v_; + } + +private: + value v_; +}; + + +} +} + +#endif // XML_CONTENT diff --git a/XML/include/Poco/XML/QName.h b/XML/include/Poco/XML/QName.h new file mode 100644 index 000000000..8602915d4 --- /dev/null +++ b/XML/include/Poco/XML/QName.h @@ -0,0 +1,103 @@ +// file : cutl/xml/QName.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef POCO_XML_QNAME_HXX +#define POCO_XML_QNAME_HXX + +#include "Poco/XML/XML.h" + +#include +#include + + +namespace Poco +{ +namespace XML +{ + +// Note that the optional prefix is just a "syntactic sugar". In +// particular, it is ignored by the comparison operators and the +// std::ostream insertion operator. +// +class XML_API QName +{ +public: + QName() + { + } + QName(const std::string& name) : + name_(name) + { + } + QName(const std::string& ns, const std::string& name) : + ns_(ns), + name_(name) + { + } + QName(const std::string& ns, const std::string& name, const std::string& prefix) : + ns_(ns), + name_(name), + prefix_(prefix) + { + } + + const std::string& namespace_() const + { + return ns_; + } + const std::string& name() const + { + return name_; + } + const std::string& prefix() const + { + return prefix_; + } + + std::string& namespace_() + { + return ns_; + } + std::string& name() + { + return name_; + } + std::string& prefix() + { + return prefix_; + } + + // Printable representation in the [#] form. + // + std::string string() const; + + // Note that comparison operators + // +public: + friend bool operator<(const QName& x, const QName& y) + { + return x.ns_ < y.ns_ || (x.ns_ == y.ns_ && x.name_ < y.name_); + } + + friend bool operator==(const QName& x, const QName& y) + { + return x.ns_ == y.ns_ && x.name_ == y.name_; + } + + friend bool operator!=(const QName& x, const QName& y) + { + return !(x == y); + } + +private: + std::string ns_; + std::string name_; + std::string prefix_; +}; + +XML_API std::ostream& operator<<(std::ostream&, const QName&); +} +} + +#endif // CUTL_XML_QNAME_HXX diff --git a/XML/include/Poco/XML/ValueTraits.h b/XML/include/Poco/XML/ValueTraits.h new file mode 100644 index 000000000..18088dbae --- /dev/null +++ b/XML/include/Poco/XML/ValueTraits.h @@ -0,0 +1,90 @@ +// file : cutl/xml/value-traits.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef POCO_XML_VALUE_TRAITS_HXX +#define POCO_XML_VALUE_TRAITS_HXX + +#include +#include // std::size_t +#include +#include + +#include "XMLStreamParserException.h" +#include "XMLStreamSerializerException.h" + +namespace Poco +{ +namespace XML +{ +class XMLStreamParser; +class XMLStreamSerializer; + +template +struct default_value_traits +{ + static T + parse(std::string, const XMLStreamParser&); + + static std::string + serialize(const T&, const XMLStreamSerializer&); +}; + +template<> +struct XML_API default_value_traits +{ + static bool + parse(std::string, const XMLStreamParser&); + + static std::string serialize(bool v, const XMLStreamSerializer&) + { + return v ? "true" : "false"; + } +}; + +template<> +struct XML_API default_value_traits +{ + static std::string parse(std::string s, const XMLStreamParser&) + { + return s; + } + + static std::string serialize(const std::string& v, const XMLStreamSerializer&) + { + return v; + } +}; + +template +struct ValueTraits: default_value_traits +{ +}; + +template +struct ValueTraits : default_value_traits +{ +}; + +template +T default_value_traits::parse(std::string s, const XMLStreamParser& p) +{ + T r; + std::istringstream is(s); + if (!(is >> r && is.eof())) + throw XMLStreamParserException(p, "invalid value '" + s + "'"); + return r; +} + +template +std::string default_value_traits::serialize(const T& v, const XMLStreamSerializer& s) +{ + std::ostringstream os; + if (!(os << v)) + throw XMLStreamSerializerException(s, "invalid value"); + return os.str(); +} +} +} + +#endif // CUTL_XML_VALUE_TRAITS_HXX diff --git a/XML/include/Poco/XML/XMLStreamParser.h b/XML/include/Poco/XML/XMLStreamParser.h new file mode 100644 index 000000000..37dd9b394 --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamParser.h @@ -0,0 +1,614 @@ +// file : XMLStreamParser.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef POCO_XML_PARSER_HXX +#define POCO_XML_PARSER_HXX + +// We only support UTF-8 expat. +// +#ifdef XML_UNICODE +# error UTF-16 expat (XML_UNICODE defined) is not supported +#endif + +#include "Poco/XML/QName.h" +#include "Poco/XML/ValueTraits.h" +#include "Poco/XML/Content.h" +#include + +#include +#include +#include +#include +#include // std::size_t +#include + + +namespace Poco +{ +namespace XML +{ +class XML_API XMLStreamParser +{ +public: + typedef unsigned short FeatureType; + + // If both receive_attributes_event and receive_attributes_map are + // specified, then receive_attributes_event is assumed. + // + static const FeatureType RECEIVE_ELEMENTS = 0x0001; + static const FeatureType RECEIVE_CHARACTERS = 0x0002; + static const FeatureType RECEIVE_ATTRIBUTE_MAP = 0x0004; + static const FeatureType RECEIVE_ATTRIBUTES_EVENT = 0x0008; + static const FeatureType RECEIVE_NAMESPACE_DECLS = 0x0010; + + static const FeatureType RECEIVE_DEFAULT = RECEIVE_ELEMENTS | RECEIVE_CHARACTERS | RECEIVE_ATTRIBUTE_MAP; + + // Parse std::istream. Input name is used in diagnostics to identify + // the document being parsed. + // + // If stream exceptions are enabled then std::ios_base::failure + // exception is used to report io errors (badbit and failbit). + // Otherwise, those are reported as the parsing exception. + // + XMLStreamParser(std::istream&, const std::string& input_name, FeatureType = RECEIVE_DEFAULT); + + // Parse memory buffer that contains the whole document. Input name + // is used in diagnostics to identify the document being parsed. + // + XMLStreamParser(const void* data, std::size_t size, const std::string& input_name, FeatureType = RECEIVE_DEFAULT); + + const std::string& input_name() const + { + return iname_; + } + + ~XMLStreamParser(); + + /// Parsing events. + enum EventType + { + // If adding new events, also update the stream insertion operator. + // + StartElement, + EndElement, + StartAttribute, + EndAttribute, + Characters, + StartNamespaceDecl, + EndNamespaceDecl, + Eof + }; + + EventType next(); + + // Get the next event and make sure that it's what's expected. If it + // is not, then throw an appropriate parsing exception. + // + void nextExpect(EventType); + + void nextExpect(EventType, const std::string& name); + + void nextExpect(EventType, const QName& qname); + + void nextExpect(EventType, const std::string& ns, const std::string& name); + + EventType peek(); + + // Return the even that was last returned by the call to next() or + // peek(). + // + EventType event() + { + return event_; + } + + // Event data. + // + const QName& qname() const + { + return *pqname_; + } + + const std::string& namespace_() const + { + return pqname_->namespace_(); + } + + const std::string& name() const + { + return pqname_->name(); + } + + const std::string& prefix() const + { + return pqname_->prefix(); + } + + std::string& value() + { + return *pvalue_; + } + + const std::string& value() const + { + return *pvalue_; + } + + template T value() const; + + Poco::UInt64 line() const + { + return line_; + } + + Poco::UInt64 column() const + { + return column_; + } + + // Attribute map lookup. If attribute is not found, then the version + // without the default value throws an appropriate parsing exception + // while the version with the default value returns that value. + // + // Note also that there is no attribute(ns,name) version since it + // would conflict with attribute(name,dv) (qualified attributes + // are not very common). + // + // Attribute map is valid throughout at the "element level" until + // end_element and not just during startElement. As a special case, + // the map is still valid after peek() that returned end_element until + // this end_element event is retrieved with next(). + // + const std::string& + attribute(const std::string& name) const; + + template + T attribute(const std::string& name) const; + + std::string attribute(const std::string& name, const std::string& default_value) const; + + template + T attribute(const std::string& name, const T& default_value) const; + + const std::string& attribute(const QName& qname) const; + + template + T attribute(const QName& qname) const; + + std::string attribute(const QName& qname, const std::string& default_value) const; + + template + T attribute(const QName& qname, const T& default_value) const; + + bool attributePresent(const std::string& name) const; + + bool attributePresent(const QName& qname) const; + + // Low-level attribute map access. Note that this API assumes + // all attributes are handled. + // + struct AttributeValueType + { + std::string value; + mutable bool handled; + }; + + typedef std::map AttributeMapType; + + const AttributeMapType& attributeMap() const; + + // Optional content processing. + // + + // Note that you cannot get/set content while peeking. + // + void content(Content); + + Content content() const; + + // Versions that also set the content. Event type must be startElement. + // + void nextExpect(EventType, const std::string& name, Content); + + void nextExpect(EventType, const QName& qname, Content); + + void nextExpect(EventType, const std::string& ns, const std::string& name, Content); + + // Helpers for parsing elements with simple content. The first two + // functions assume that startElement has already been parsed. The + // rest parse the complete element, from start to end. + // + // Note also that as with attribute(), there is no (namespace,name) + // overload since it would conflicts with (namespace,default_value). + // + std::string element(); + + template + T element(); + + std::string element(const std::string& name); + + std::string element(const QName& qname); + + template + T element(const std::string& name); + + template + T element(const QName& qname); + + std::string element(const std::string& name, const std::string& default_value); + + std::string element(const QName& qname, const std::string& default_value); + + template + T element(const std::string& name, const T& default_value); + + template + T element(const QName& qname, const T& default_value); + + // C++11 range-based for support. Generally, the iterator interface + // doesn't make much sense for the XMLStreamParser so for now we have an + // implementation that is just enough to the range-based for. + // + struct Iterator + { + typedef EventType value_type; + + Iterator(XMLStreamParser* p = 0, EventType e = Eof) : + p_(p), + e_(e) + { + } + value_type operator*() const + { + return e_; + } + Iterator& operator++() + { + e_ = p_->next(); + return *this; + } + + // Comparison only makes sense when comparing to end (eof). + // + bool operator==(Iterator y) const + { + return e_ == Eof && y.e_ == Eof; + } + bool operator!=(Iterator y) const + { + return !(*this == y); + } + + private: + XMLStreamParser* p_; + EventType e_; + }; + + Iterator begin() + { + return Iterator(this, next()); + } + Iterator end() + { + return Iterator(this, Eof); + } + +private: + XMLStreamParser(const XMLStreamParser&); + XMLStreamParser& operator=(const XMLStreamParser&); + + static void XMLCALL start_element_(void*, const XML_Char*, const XML_Char**); + static void XMLCALL end_element_(void*, const XML_Char*); + static void XMLCALL characters_(void*, const XML_Char*, int); + static void XMLCALL start_namespace_decl_(void*, const XML_Char*, const XML_Char*); + static void XMLCALL end_namespace_decl_(void*, const XML_Char*); + + void init(); + EventType next_(bool peek); + EventType next_body(); + void handle_error(); + + // If size_ is 0, then data is std::istream. Otherwise, it is a buffer. + // + union + { + std::istream* is; + const void* buf; + }data_; + + std::size_t size_; + + const std::string iname_; + FeatureType feature_; + + XML_Parser p_; + std::size_t depth_; + bool accumulate_; // Whether we are accumulating character content. + enum + { + state_next, state_peek + }state_; + EventType event_; + EventType queue_; + + QName qname_; + std::string value_; + + // These are used to avoid copying when we are handling attributes + // and namespace decls. + // + const QName* pqname_; + std::string* pvalue_; + + Poco::UInt64 line_; + Poco::UInt64 column_; + + // Attributes as events. + // + struct attribute_type + { + QName qname; + std::string value; + }; + + typedef std::vector attributes; + + attributes attr_; + attributes::size_type attr_i_; // Index of the current attribute. + + // Namespace declarations. + // + typedef std::vector namespace_decls; + + namespace_decls start_ns_; + namespace_decls::size_type start_ns_i_;// Index of the current decl. + + namespace_decls end_ns_; + namespace_decls::size_type end_ns_i_;// Index of the current decl. + + // Element state consisting of the content model and attribute map. + // + struct ElementEntry + { + ElementEntry(std::size_t d, Content c = Content::Mixed) : + depth(d), + content(c), + attr_unhandled_(0) + { + } + + std::size_t depth; + Content content; + AttributeMapType attr_map_; + mutable AttributeMapType::size_type attr_unhandled_; + }; + + typedef std::vector ElementState; + std::vector element_state_; + + // Empty attribute map to return when an element has no attributes. + // + const AttributeMapType empty_attr_map_; + + // Return the element entry corresponding to the current depth, if + // exists, and NULL otherwise. + // + const ElementEntry* getElement() const; + + const ElementEntry* get_element_() const; + + void pop_element(); +}; + +XML_API std::ostream& operator<<(std::ostream&, XMLStreamParser::EventType); + +inline XMLStreamParser::XMLStreamParser(std::istream& is, const std::string& iname, FeatureType f) + : size_(0), iname_(iname), feature_(f) +{ + data_.is = &is; + init(); +} + +inline XMLStreamParser::XMLStreamParser(const void* data, std::size_t size, const std::string& iname, FeatureType f) + : size_(size), iname_(iname), feature_(f) +{ + assert(data != 0 && size != 0); + + data_.buf = data; + init(); +} + +inline XMLStreamParser::EventType XMLStreamParser::peek() +{ + if (state_ == state_peek) + return event_; + else + { + EventType e(next_(true)); + state_ = state_peek; // Set it after the call to next_(). + return e; + } +} + +template +inline T XMLStreamParser::value() const +{ + return ValueTraits < T > ::parse(value(), *this); +} + +inline const std::string& XMLStreamParser::attribute(const std::string& n) const +{ + return attribute(QName(n)); +} + +template +inline T XMLStreamParser::attribute(const std::string& n) const +{ + return attribute < T > (QName(n)); +} + +inline std::string XMLStreamParser::attribute(const std::string& n, const std::string& dv) const +{ + return attribute(QName(n), dv); +} + +template +inline T XMLStreamParser::attribute(const std::string& n, const T& dv) const +{ + return attribute < T > (QName(n), dv); +} + +template +inline T XMLStreamParser::attribute(const QName& qn) const +{ + return ValueTraits < T > ::parse(attribute(qn), *this); +} + +inline bool XMLStreamParser::attributePresent(const std::string& n) const +{ + return attributePresent(QName(n)); +} + +inline const XMLStreamParser::AttributeMapType& XMLStreamParser::attributeMap() const +{ + if (const ElementEntry* e = getElement()) + { + e->attr_unhandled_ = 0; // Assume all handled. + return e->attr_map_; + } + + return empty_attr_map_; +} + +inline void XMLStreamParser::nextExpect(EventType e, const QName& qn) +{ + nextExpect(e, qn.namespace_(), qn.name()); +} + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& n) +{ + nextExpect(e, std::string(), n); +} + +inline void XMLStreamParser::nextExpect(EventType e, const QName& qn, Content c) +{ + nextExpect(e, qn); + assert(e == StartElement); + content(c); +} + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& n, Content c) +{ + nextExpect(e, std::string(), n); + assert(e == StartElement); + content(c); +} + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& ns, const std::string& n, Content c) +{ + nextExpect(e, ns, n); + assert(e == StartElement); + content(c); +} + +template +inline T XMLStreamParser::element() +{ + return ValueTraits < T > ::parse(element(), *this); +} + +inline std::string XMLStreamParser::element(const std::string& n) +{ + nextExpect(StartElement, n); + return element(); +} + +inline std::string XMLStreamParser::element(const QName& qn) +{ + nextExpect(StartElement, qn); + return element(); +} + +template +inline T XMLStreamParser::element(const std::string& n) +{ + return ValueTraits < T > ::parse(element(n), *this); +} + +template +inline T XMLStreamParser::element(const QName& qn) +{ + return ValueTraits < T > ::parse(element(qn), *this); +} + +inline std::string XMLStreamParser::element(const std::string& n, const std::string& dv) +{ + return element(QName(n), dv); +} + +template +inline T XMLStreamParser::element(const std::string& n, const T& dv) +{ + return element < T > (QName(n), dv); +} + +inline void XMLStreamParser::content(Content c) +{ + assert(state_ == state_next); + + if (!element_state_.empty() && element_state_.back().depth == depth_) + element_state_.back().content = c; + else + element_state_.push_back(ElementEntry(depth_, c)); +} + +inline Content XMLStreamParser::content() const +{ + assert(state_ == state_next); + + return !element_state_.empty() && element_state_.back().depth == depth_ ? element_state_.back().content : Content(Content::Mixed); +} + +inline const XMLStreamParser::ElementEntry* XMLStreamParser::getElement() const +{ + return element_state_.empty() ? 0 : get_element_(); +} + +template +T XMLStreamParser::attribute(const QName& qn, const T& dv) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return ValueTraits < T > ::parse(i->second.value, *this); + } + } + + return dv; +} + +template +T XMLStreamParser::element(const QName& qn, const T& dv) +{ + if (peek() == StartElement && qname() == qn) + { + next(); + return element(); + } + + return dv; +} +} +} + +#endif // CUTL_XML_PARSER_HXX diff --git a/XML/include/Poco/XML/XMLStreamParserException.h b/XML/include/Poco/XML/XMLStreamParserException.h new file mode 100644 index 000000000..a7816d6a1 --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamParserException.h @@ -0,0 +1,69 @@ +/// +/// \package metamodel +/// \file XMLStreamException.h +/// +/// \author Marian Krivos +/// \date Aug 21, 2015 - 6:52:24 PM +/// \brief definicia typu +/// +/// (C) Copyright 2015 R-SYS,s.r.o +/// All rights reserved. +/// + +#ifndef POCO_XML_XMLSTREAMPARSEREXCEPTION_H_ +#define POCO_XML_XMLSTREAMPARSEREXCEPTION_H_ + +#include + +namespace Poco +{ +namespace XML +{ +class XMLStreamParser; + +class XML_API XMLStreamParserException : +public Poco::XML::XMLException +{ +public: + XMLStreamParserException(const std::string& name, Poco::UInt64 line, Poco::UInt64 column, const std::string& description); + + XMLStreamParserException(const XMLStreamParser&, const std::string& description); + + virtual ~XMLStreamParserException() throw (); + + const char* name() const throw() + { + return name_.c_str(); + } + + Poco::UInt64 line() const + { + return line_; + } + + Poco::UInt64 column() const + { + return column_; + } + + const std::string& description() const + { + return description_; + } + + virtual const char* what() const throw (); + +private: + void init(); + + std::string name_; + Poco::UInt64 line_; + Poco::UInt64 column_; + std::string description_; + std::string what_; +}; + +} +/* namespace XML */ +} /* namespace Poco */ +#endif /* POCO_XML_XMLSTREAMPARSEREXCEPTION_H_ */ diff --git a/XML/include/Poco/XML/XMLStreamSerializer.h b/XML/include/Poco/XML/XMLStreamSerializer.h new file mode 100644 index 000000000..84221cfb3 --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamSerializer.h @@ -0,0 +1,239 @@ +// file : xml/XMLStreamSerializer -*- C++ -*- +// copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef POCO_XML_XMLSERIALIZER +#define POCO_XML_XMLSERIALIZER + +#include "QName.h" +#include "ValueTraits.h" +#include "genx.h" + +#include +#include +#include // std::size_t + +namespace Poco +{ +namespace XML +{ +class XMLStreamSerializer; + +class XML_API XMLStreamSerializer +{ +public: + // Serialize to std::ostream. Output name is used in diagnostics to + // identify the document being serialized. The indentation argument + // specifies the number of indentation spaces that should be used for + // pretty-printing. If 0 is passed, no pretty-printing is performed. + // + // If stream exceptions are enabled then std::ios_base::failure + // exception is used to report io errors (badbit and failbit). + // Otherwise, those are reported as the XMLStreamSerializerException exception. + // + XMLStreamSerializer(std::ostream&, const std::string& output_name, unsigned short indentation = 2); + + const std::string& outputName() const + { + return oname_; + } + + ~XMLStreamSerializer(); + + void startElement(const QName& qname); + + void startElement(const std::string& name); + + void startElement(const std::string& ns, const std::string& name); + + void endElement(); + + // Helpers for serializing elements with simple content. The first two + // functions assume that startElement() has already been called. The + // other two serialize the complete element, from start to end. + // + void element(const std::string& value); + + template + void element(const T& value); + + void element(const std::string& name, const std::string& value); + + template + void element(const std::string& name, const T& value); + + void element(const QName& qname, const std::string& value); + + template + void element(const QName& qname, const T& value); + + void element(const std::string& namespace_, const std::string& name, const std::string& value); + + template + void element(const std::string& namespace_, const std::string& name, const T& value); + + // Attributes. + // + void startAttribute(const QName& qname); + + void startAttribute(const std::string& name); + + void startAttribute(const std::string& ns, const std::string& name); + + void endAttribute(); + + void attribute(const QName& qname, const std::string& value); + + template + void attribute(const QName& qname, const T& value); + + void attribute(const std::string& name, const std::string& value); + + template + void attribute(const std::string& name, const T& value); + + void attribute(const std::string& ns, const std::string& name, const std::string& value); + + template + void attribute(const std::string& ns, const std::string& name, const T& value); + + // Characters. + // + void characters(const std::string& value); + + template + void characters(const T& value); + + // Namespaces declaration. If prefix is empty, then the default + // namespace is declared. If both prefix and namespace are empty, + // then the default namespace declaration is cleared (xmlns=""). + // + void namespaceDecl(const std::string& ns, const std::string& prefix); + + // XML declaration. If encoding or standalone are not specified, + // then these attributes are omitted from the output. + // + void xmlDecl(const std::string& version = "1.0", const std::string& encoding = "UTF-8", const std::string& standalone = ""); + + // Utility functions. + // + // Return true if there is a mapping. In this case, prefix contains + // the mapped prefix. + // + bool lookupNamespacePrefix(const std::string& ns, std::string& prefix); + +private: + XMLStreamSerializer(const XMLStreamSerializer&); + XMLStreamSerializer& operator=(const XMLStreamSerializer&); + + void handleError(genxStatus); + + std::ostream& os_; + std::ostream::iostate os_state_;// Original exception state. + const std::string oname_; + + genxWriter s_; + genxSender sender_; + std::size_t depth_; +}; + +inline void XMLStreamSerializer::startElement(const QName& qname) +{ + startElement(qname.namespace_(), qname.name()); +} + +inline void XMLStreamSerializer::startElement(const std::string& name) +{ + startElement(std::string(), name); +} + +inline void XMLStreamSerializer::element(const std::string& v) +{ + if (!v.empty()) + characters(v); + + endElement(); +} + +template +inline void XMLStreamSerializer::element(const T& v) +{ + element(ValueTraits < T > ::serialize(v, *this)); +} + +inline void XMLStreamSerializer::element(const std::string& n, const std::string& v) +{ + element(std::string(), n, v); +} + +template +inline void XMLStreamSerializer::element(const std::string& n, const T& v) +{ + element(n, ValueTraits < T > ::serialize(v, *this)); +} + +inline void XMLStreamSerializer::element(const QName& qn, const std::string& v) +{ + element(qn.namespace_(), qn.name(), v); +} + +template +inline void XMLStreamSerializer::element(const QName& qn, const T& v) +{ + element(qn, ValueTraits < T > ::serialize(v, *this)); +} + +template +inline void XMLStreamSerializer::element(const std::string& ns, const std::string& n, const T& v) +{ + element(ns, n, ValueTraits < T > ::serialize(v, *this)); +} + +inline void XMLStreamSerializer::startAttribute(const QName& qname) +{ + startAttribute(qname.namespace_(), qname.name()); +} + +inline void XMLStreamSerializer::startAttribute(const std::string& name) +{ + startAttribute(std::string(), name); +} + +inline void XMLStreamSerializer::attribute(const QName& qname, const std::string& value) +{ + attribute(qname.namespace_(), qname.name(), value); +} + +template +inline void XMLStreamSerializer::attribute(const QName& qname, const T& value) +{ + attribute(qname, ValueTraits < T > ::serialize(value, *this)); +} + +inline void XMLStreamSerializer::attribute(const std::string& name, const std::string& value) +{ + attribute(std::string(), name, value); +} + +template +inline void XMLStreamSerializer::attribute(const std::string& name, const T& value) +{ + attribute(name, ValueTraits < T > ::serialize(value, *this)); +} + +template +inline void XMLStreamSerializer::attribute(const std::string& ns, const std::string& name, const T& value) +{ + attribute(ns, name, ValueTraits < T > ::serialize(value, *this)); +} + +template +inline void XMLStreamSerializer::characters(const T& value) +{ + characters(ValueTraits < T > ::serialize(value, *this)); +} + +} +} + +#endif // XML_SERIALIZER diff --git a/XML/include/Poco/XML/XMLStreamSerializerException.h b/XML/include/Poco/XML/XMLStreamSerializerException.h new file mode 100644 index 000000000..afbc3ca10 --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamSerializerException.h @@ -0,0 +1,57 @@ +/// +/// \package metamodel +/// \file XMLStreamException.h +/// +/// \author Marian Krivos +/// \date Aug 21, 2015 - 6:52:24 PM +/// \brief definicia typu +/// +/// (C) Copyright 2015 R-SYS,s.r.o +/// All rights reserved. +/// + +#ifndef POCO_XML_XMLSTREAMSERIALIZEREXCEPTION_H_ +#define POCO_XML_XMLSTREAMSERIALIZEREXCEPTION_H_ + +#include + +namespace Poco +{ +namespace XML +{ +class XMLStreamSerializer; + +struct XML_API XMLStreamSerializerException: + public Poco::XML::XMLException +{ + virtual ~XMLStreamSerializerException() throw (); + + XMLStreamSerializerException(const std::string& name, const std::string& description); + + XMLStreamSerializerException(const XMLStreamSerializer&, const std::string& description); + + const char* name() const throw () + { + return name_.c_str(); + } + + const std::string& description() const + { + return description_; + } + + virtual const char* what() const throw (); + +private: + void init(); + +private: + std::string name_; + std::string description_; + std::string what_; +}; + +} +/* namespace XML */ +} /* namespace Poco */ +#endif /* POCO_XML_XMLSTREAMPARSEREXCEPTION_H_ */ diff --git a/XML/src/QName.cpp b/XML/src/QName.cpp new file mode 100644 index 000000000..691461cc2 --- /dev/null +++ b/XML/src/QName.cpp @@ -0,0 +1,34 @@ +// file : cutl/xml/QName.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include "QName.h" +#include + +using namespace std; + +namespace Poco +{ +namespace XML +{ + +string QName::string() const +{ + std::string r; + if (!ns_.empty()) + { + r += ns_; + r += '#'; + } + + r += name_; + return r; +} + +ostream& operator<<(ostream& os, const QName& qn) +{ + return os << qn.string(); +} + +} +} diff --git a/XML/src/ValueTraits.cpp b/XML/src/ValueTraits.cpp new file mode 100644 index 000000000..b8337d139 --- /dev/null +++ b/XML/src/ValueTraits.cpp @@ -0,0 +1,26 @@ +// file : cutl/xml/value-traits.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include "XMLStreamParser.h" +#include "XMLStreamParserException.h" + +using namespace std; + +namespace Poco +{ +namespace XML +{ + +bool default_value_traits::parse(string s, const XMLStreamParser& p) +{ + if (s == "true" || s == "1" || s == "True" || s == "TRUE") + return true; + else if (s == "false" || s == "0" || s == "False" || s == "FALSE") + return false; + else + throw XMLStreamParserException(p, "invalid bool value '" + s + "'"); +} + +} +} diff --git a/XML/src/XMLStreamParser.cpp b/XML/src/XMLStreamParser.cpp new file mode 100644 index 000000000..887354aca --- /dev/null +++ b/XML/src/XMLStreamParser.cpp @@ -0,0 +1,882 @@ +// file : cutl/xml/XMLStreamParser.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include // std::bad_alloc +#include +#include // std::strchr +#include +#include +#include + +#include "XMLStreamParser.h" + +using namespace std; + +namespace Poco +{ +namespace XML +{ +// XMLStreamParser::event_type +// +static const char* parser_event_str[] = +{ "start element", "end element", "start attribute", "end attribute", "characters", "start namespace declaration", "end namespace declaration", "end of file" }; + +ostream& +operator<<(ostream& os, XMLStreamParser::EventType e) +{ + return os << parser_event_str[e]; +} + +// XMLStreamParser +// +XMLStreamParser::~XMLStreamParser() +{ + if (p_ != 0) + XML_ParserFree (p_); +} + +void XMLStreamParser::init() +{ + depth_ = 0; + state_ = state_next; + event_ = Eof; + queue_ = Eof; + + pqname_ = &qname_; + pvalue_ = &value_; + + line_ = 0; + column_ = 0; + + attr_i_ = 0; + start_ns_i_ = 0; + end_ns_i_ = 0; + + if ((feature_ & RECEIVE_ATTRIBUTE_MAP) != 0 && (feature_ & RECEIVE_ATTRIBUTES_EVENT) != 0) + feature_ &= ~RECEIVE_ATTRIBUTE_MAP; + + // Allocate the XMLStreamParser. Make sure nothing else can throw after + // this call since otherwise we will leak it. + // + p_ = XML_ParserCreateNS(0, XML_Char(' ')); + + if (p_ == 0) + throw bad_alloc(); + + // Get prefixes in addition to namespaces and local names. + // + XML_SetReturnNSTriplet(p_, true); + + // Set handlers. + // + XML_SetUserData(p_, this); + + if ((feature_ & RECEIVE_ELEMENTS) != 0) + { + XML_SetStartElementHandler(p_, &start_element_); + XML_SetEndElementHandler(p_, &end_element_); + } + + if ((feature_ & RECEIVE_CHARACTERS) != 0) + XML_SetCharacterDataHandler(p_, &characters_); + + if ((feature_ & RECEIVE_NAMESPACE_DECLS) != 0) + XML_SetNamespaceDeclHandler(p_, &start_namespace_decl_, &end_namespace_decl_); +} + +void XMLStreamParser::handle_error() +{ + XML_Error e(XML_GetErrorCode (p_)); + + if (e == XML_ERROR_ABORTED) + { + // For now we only abort the XMLStreamParser in the characters_() and + // start_element_() handlers. + // + switch (content()) + { + case Content::Empty: + throw XMLStreamParserException(*this, "characters in empty content"); + case Content::Simple: + throw XMLStreamParserException(*this, "element in simple content"); + case Content::Complex: + throw XMLStreamParserException(*this, "characters in complex content"); + default: + assert(false); + } + } + else + throw XMLStreamParserException(iname_, XML_GetCurrentLineNumber(p_), XML_GetCurrentColumnNumber(p_), XML_ErrorString(e)); +} + +struct stream_exception_controller +{ + ~stream_exception_controller() + { + istream::iostate s = is_.rdstate(); + s &= ~istream::failbit; + + // If our error state (sans failbit) intersects with the + // exception state then that means we have an active + // exception and changing error/exception state will + // cause another to be thrown. + // + if (!(old_state_ & s)) + { + // Clear failbit if it was caused by eof. + // + if (is_.fail() && is_.eof()) + is_.clear(s); + + is_.exceptions(old_state_); + } + } + + stream_exception_controller(istream& is) + : is_(is), old_state_(is_.exceptions()) + { + is_.exceptions(old_state_ & ~istream::failbit); + } + +private: + stream_exception_controller(const stream_exception_controller&); + + stream_exception_controller& + operator=(const stream_exception_controller&); + +private: + istream& is_; + istream::iostate old_state_; +}; + +XMLStreamParser::EventType XMLStreamParser::next() +{ + if (state_ == state_next) + return next_(false); + else + { + // If we previously peeked at start/end_element, then adjust + // state accordingly. + // + switch (event_) + { + case EndElement: + { + if (!element_state_.empty() && element_state_.back().depth == depth_) + pop_element(); + + depth_--; + break; + } + case StartElement: + { + depth_++; + break; + } + default: + break; + } + + state_ = state_next; + return event_; + } +} + +const string& XMLStreamParser::attribute(const QName& qn) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return i->second.value; + } + } + + throw XMLStreamParserException(*this, "attribute '" + qn.string() + "' expected"); +} + +string XMLStreamParser::attribute(const QName& qn, const string& dv) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return i->second.value; + } + } + + return dv; +} + +bool XMLStreamParser::attributePresent(const QName& qn) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attr_map_.find(qn)); + + if (i != e->attr_map_.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return true; + } + } + + return false; +} + +void XMLStreamParser::nextExpect(EventType e) +{ + if (next() != e) + throw XMLStreamParserException(*this, string(parser_event_str[e]) + " expected"); +} + +void XMLStreamParser::nextExpect(EventType e, const string& ns, const string& n) +{ + if (next() != e || namespace_() != ns || name() != n) + throw XMLStreamParserException(*this, string(parser_event_str[e]) + " '" + QName(ns, n).string() + "' expected"); +} + +string XMLStreamParser::element() +{ + content(Content::Simple); + string r; + + // The content of the element can be empty in which case there + // will be no characters event. + // + EventType e(next()); + if (e == Characters) + { + r.swap(value()); + e = next(); + } + + // We cannot really get anything other than end_element since + // the simple content validation won't allow it. + // + assert(e == EndElement); + + return r; +} + +string XMLStreamParser::element(const QName& qn, const string& dv) +{ + if (peek() == StartElement && qname() == qn) + { + next(); + return element(); + } + + return dv; +} + +const XMLStreamParser::ElementEntry* XMLStreamParser::get_element_() const +{ + // The start_element_() Expat handler may have already provisioned + // an entry in the element stack. In this case, we need to get the + // one before it, if any. + // + const ElementEntry* r(0); + ElementState::size_type n(element_state_.size() - 1); + + if (element_state_[n].depth == depth_) + r = &element_state_[n]; + else if (n != 0 && element_state_[n].depth > depth_) + { + n--; + if (element_state_[n].depth == depth_) + r = &element_state_[n]; + } + + return r; +} + +void XMLStreamParser::pop_element() +{ + // Make sure there are no unhandled attributes left. + // + const ElementEntry& e(element_state_.back()); + if (e.attr_unhandled_ != 0) + { + // Find the first unhandled attribute and report it. + // + for (AttributeMapType::const_iterator i(e.attr_map_.begin()); i != e.attr_map_.end(); ++i) + { + if (!i->second.handled) + throw XMLStreamParserException(*this, "unexpected attribute '" + i->first.string() + "'"); + } + assert(false); + } + + element_state_.pop_back(); +} + +XMLStreamParser::EventType XMLStreamParser::next_(bool peek) +{ + EventType e(next_body()); + + // Content-specific processing. Note that we handle characters in the + // characters_() Expat handler for two reasons. Firstly, it is faster + // to ignore the whitespaces at the source. Secondly, this allows us + // to distinguish between element and attribute characters. We can + // move this processing to the handler because the characters event + // is never queued. + // + switch (e) + { + case EndElement: + { + // If this is a peek, then avoid popping the stack just yet. + // This way, the attribute map will still be valid until we + // call next(). + // + if (!peek) + { + if (!element_state_.empty() && element_state_.back().depth == depth_) + pop_element(); + + depth_--; + } + break; + } + case StartElement: + { + if (const ElementEntry* e = getElement()) + { + switch (e->content) + { + case Content::Empty: + throw XMLStreamParserException(*this, "element in empty content"); + case Content::Simple: + throw XMLStreamParserException(*this, "element in simple content"); + default: + break; + } + } + + // If this is a peek, then delay adjusting the depth. + // + if (!peek) + depth_++; + + break; + } + default: + break; + } + + return e; +} + +XMLStreamParser::EventType XMLStreamParser::next_body() +{ + // See if we have any start namespace declarations we need to return. + // + if (start_ns_i_ < start_ns_.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (event_) + { + case StartNamespaceDecl: + { + if (++start_ns_i_ == start_ns_.size()) + { + start_ns_i_ = 0; + start_ns_.clear(); + pqname_ = &qname_; + break; // No more declarations. + } + // Fall through. + } + case StartElement: + { + event_ = StartNamespaceDecl; + pqname_ = &start_ns_[start_ns_i_]; + return event_; + } + default: + { + assert(false); + return event_ = Eof; + } + } + } + + // See if we have any attributes we need to return as events. + // + if (attr_i_ < attr_.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (event_) + { + case StartAttribute: + { + event_ = Characters; + pvalue_ = &attr_[attr_i_].value; + return event_; + } + case Characters: + { + event_ = EndAttribute; // Name is already set. + return event_; + } + case EndAttribute: + { + if (++attr_i_ == attr_.size()) + { + attr_i_ = 0; + attr_.clear(); + pqname_ = &qname_; + pvalue_ = &value_; + break; // No more attributes. + } + // Fall through. + } + case StartElement: + case StartNamespaceDecl: + { + event_ = StartAttribute; + pqname_ = &attr_[attr_i_].qname; + return event_; + } + default: + { + assert(false); + return event_ = Eof; + } + } + } + + // See if we have any end namespace declarations we need to return. + // + if (end_ns_i_ < end_ns_.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (event_) + { + case EndNamespaceDecl: + { + if (++end_ns_i_ == end_ns_.size()) + { + end_ns_i_ = 0; + end_ns_.clear(); + pqname_ = &qname_; + break; // No more declarations. + } + // Fall through. + } + // The end namespace declaration comes before the end element + // which means it can follow pretty much any other event. + // + default: + { + event_ = EndNamespaceDecl; + pqname_ = &end_ns_[end_ns_i_]; + return event_; + } + } + } + + // Check the queue. + // + if (queue_ != Eof) + { + event_ = queue_; + queue_ = Eof; + + line_ = XML_GetCurrentLineNumber(p_); + column_ = XML_GetCurrentColumnNumber(p_); + + return event_; + } + + // Reset the character accumulation flag. + // + accumulate_ = false; + + XML_ParsingStatus ps; + XML_GetParsingStatus(p_, &ps); + + switch (ps.parsing) + { + case XML_INITIALIZED: + { + // As if we finished the previous chunk. + break; + } + case XML_PARSING: + { + assert(false); + return event_ = Eof; + } + case XML_FINISHED: + { + return event_ = Eof; + } + case XML_SUSPENDED: + { + switch (XML_ResumeParser(p_)) + { + case XML_STATUS_SUSPENDED: + { + // If the XMLStreamParser is again in the suspended state, then + // that means we have the next event. + // + return event_; + } + case XML_STATUS_OK: + { + // Otherwise, we need to get and parse the next chunk of data + // unless this was the last chunk, in which case this is eof. + // + if (ps.finalBuffer) + return event_ = Eof; + + break; + } + case XML_STATUS_ERROR: + handle_error(); + } + break; + } + } + + // Get and parse the next chunk of data until we get the next event + // or reach eof. + // + if (!accumulate_) + event_ = Eof; + + XML_Status s; + do + { + if (size_ != 0) + { + s = XML_Parse(p_, static_cast(data_.buf), static_cast(size_), true); + + if (s == XML_STATUS_ERROR) + handle_error(); + + break; + } + else + { + const size_t cap(4096); + + char* b(static_cast(XML_GetBuffer(p_, cap))); + if (b == 0) + throw bad_alloc(); + + // Temporarily unset the exception failbit. Also clear the fail bit + // when we reset the old state if it was caused by eof. + // + istream& is(*data_.is); + { + stream_exception_controller sec(is); + is.read(b, static_cast(cap)); + } + + // If the caller hasn't configured the stream to use exceptions, + // then use the parsing exception to report an error. + // + if (is.bad() || (is.fail() && !is.eof())) + throw XMLStreamParserException(*this, "io failure"); + + bool eof(is.eof()); + + s = XML_ParseBuffer(p_, static_cast(is.gcount()), eof); + + if (s == XML_STATUS_ERROR) + handle_error(); + + if (eof) + break; + } + } while (s != XML_STATUS_SUSPENDED); + + return event_; +} + +static void splitName(const XML_Char* s, QName& qn) +{ + string& ns(qn.namespace_()); + string& name(qn.name()); + string& prefix(qn.prefix()); + + const char* p(strchr(s, ' ')); + + if (p == 0) + { + ns.clear(); + name = s; + prefix.clear(); + } + else + { + ns.assign(s, 0, p - s); + + s = p + 1; + p = strchr(s, ' '); + + if (p == 0) + { + name = s; + prefix.clear(); + } + else + { + name.assign(s, 0, p - s); + prefix = p + 1; + } + } +} + +void XMLCALL XMLStreamParser::start_element_(void* v, const XML_Char* name, const XML_Char** atts) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // Cannot be a followup event. + // + assert(ps.parsing == XML_PARSING); + + // When accumulating characters in simple content, we expect to + // see more characters or end element. Seeing start element is + // possible but means violation of the content model. + // + if (p.accumulate_) + { + // It would have been easier to throw the exception directly, + // however, the Expat code is most likely not exception safe. + // + p.line_ = XML_GetCurrentLineNumber(p.p_); + p.column_ = XML_GetCurrentColumnNumber(p.p_); + XML_StopParser(p.p_, false); + return; + } + + p.event_ = StartElement; + splitName(name, p.qname_); + + p.line_ = XML_GetCurrentLineNumber(p.p_); + p.column_ = XML_GetCurrentColumnNumber(p.p_); + + // Handle attributes. + // + if (*atts != 0) + { + bool am((p.feature_ & RECEIVE_ATTRIBUTE_MAP) != 0); + bool ae((p.feature_ & RECEIVE_ATTRIBUTES_EVENT) != 0); + + // Provision an entry for this element. + // + ElementEntry* pe(0); + if (am) + { + p.element_state_.push_back(ElementEntry(p.depth_ + 1)); + pe = &p.element_state_.back(); + } + + if (am || ae) + { + for (; *atts != 0; atts += 2) + { + if (am) + { + QName qn; + splitName(*atts, qn); + AttributeMapType::value_type v(qn, AttributeValueType()); + v.second.value = *(atts + 1); + v.second.handled = false; + pe->attr_map_.insert(v); + } + else + { + p.attr_.push_back(attribute_type()); + splitName(*atts, p.attr_.back().qname); + p.attr_.back().value = *(atts + 1); + } + } + + if (am) + pe->attr_unhandled_ = pe->attr_map_.size(); + } + } + + XML_StopParser(p.p_, true); + } + +void XMLCALL XMLStreamParser::end_element_(void* v, const XML_Char* name) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // This can be a followup event for empty elements (). In this + // case the element name is already set. + // + if (ps.parsing != XML_PARSING) + p.queue_ = EndElement; + else + { + splitName(name, p.qname_); + + // If we are accumulating characters, then queue this event. + // + if (p.accumulate_) + p.queue_ = EndElement; + else + { + p.event_ = EndElement; + + p.line_ = XML_GetCurrentLineNumber(p.p_); + p.column_ = XML_GetCurrentColumnNumber(p.p_); + } + + XML_StopParser(p.p_, true); + } +} + + void XMLCALL XMLStreamParser::characters_(void* v, const XML_Char* s, int n) + { + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + Content cont(p.content()); + + // If this is empty or complex content, see if these are whitespaces. + // + switch (cont) + { + case Content::Empty: + case Content::Complex: + { + for (int i(0); i != n; ++i) + { + char c(s[i]); + if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09) + continue; + + // It would have been easier to throw the exception directly, + // however, the Expat code is most likely not exception safe. + // + p.line_ = XML_GetCurrentLineNumber(p.p_); + p.column_ = XML_GetCurrentColumnNumber(p.p_); + XML_StopParser(p.p_, false); + break; + } + return; + } + default: + break; + } + + // Append the characters if we are accumulating. This can also be a + // followup event for another character event. In this case also + // append the data. + // + if (p.accumulate_ || ps.parsing != XML_PARSING) + { + assert(p.event_ == Characters); +p.value_.append(s, n); +} +else +{ +p.event_ = Characters; +p.value_.assign(s, n); + +p.line_ = XML_GetCurrentLineNumber(p.p_); +p.column_ = XML_GetCurrentColumnNumber(p.p_); + +// In simple content we need to accumulate all the characters +// into a single event. To do this we will let the XMLStreamParser run +// until we reach the end of the element. +// +if (cont == Content::Simple) +p.accumulate_ = true; +else +XML_StopParser(p.p_, true); +} +} + +void XMLCALL XMLStreamParser::start_namespace_decl_(void* v, const XML_Char* prefix, const XML_Char* ns) +{ +XMLStreamParser& p(*static_cast(v)); + +XML_ParsingStatus ps; +XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // +if (ps.parsing == XML_FINISHED) +return; + +p.start_ns_.push_back(QName()); +p.start_ns_.back().prefix() = (prefix != 0 ? prefix : ""); +p.start_ns_.back().namespace_() = (ns != 0 ? ns : ""); +} + +void XMLCALL XMLStreamParser::end_namespace_decl_(void* v, const XML_Char* prefix) +{ +XMLStreamParser& p(*static_cast(v)); + +XML_ParsingStatus ps; +XML_GetParsingStatus(p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // +if (ps.parsing == XML_FINISHED) +return; + +p.end_ns_.push_back(QName()); +p.end_ns_.back().prefix() = (prefix != 0 ? prefix : ""); +} +} +} diff --git a/XML/src/XMLStreamParserException.cpp b/XML/src/XMLStreamParserException.cpp new file mode 100644 index 000000000..6fe44655d --- /dev/null +++ b/XML/src/XMLStreamParserException.cpp @@ -0,0 +1,54 @@ +/// +/// \package metamodel +/// \file XMLStreamException.cpp +/// +/// \author Marian Krivos +/// \date Aug 21, 2015 - 6:52:24 PM +/// \brief definicia typu +/// +/// (C) Copyright 2015 R-SYS,s.r.o +/// All rights reserved. +/// + +#include "XMLStreamParserException.h" +#include "XMLStreamParser.h" + +using namespace std; + +namespace Poco +{ +namespace XML +{ + +XMLStreamParserException::~XMLStreamParserException() throw () +{ +} + +XMLStreamParserException::XMLStreamParserException(const string& n, Poco::UInt64 l, Poco::UInt64 c, const string& d) + : name_(n), line_(l), column_(c), description_(d) +{ + init(); +} + +XMLStreamParserException::XMLStreamParserException(const XMLStreamParser& p, const std::string& d) + : name_(p.input_name()), line_(p.line()), column_(p.column()), description_(d) +{ + init(); +} + +void XMLStreamParserException::init() +{ + std::ostringstream os; + if (!name_.empty()) + os << name_ << ':'; + os << line_ << ':' << column_ << ": error: " << description_; + what_ = os.str(); +} + +char const* XMLStreamParserException::what() const throw () +{ + return what_.c_str(); +} + +} /* namespace XML */ +} /* namespace Poco */ diff --git a/XML/src/XMLStreamSerializer.cpp b/XML/src/XMLStreamSerializer.cpp new file mode 100644 index 000000000..f6f9f52aa --- /dev/null +++ b/XML/src/XMLStreamSerializer.cpp @@ -0,0 +1,191 @@ +// file : xml/XMLStreamSerializer.cxx +// copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include "XMLStreamSerializer.h" +#include "XMLStreamSerializerException.h" + +#include // std::bad_alloc +#include // std::strlen + +using namespace std; + +namespace Poco +{ +namespace XML +{ +// XMLStreamSerializer +// +extern "C" genxStatus genx_write(void* p, constUtf8 us) +{ + // It would have been easier to throw the exception directly, + // however, the Genx code is most likely not exception safe. + // + ostream* os(static_cast(p)); + const char* s(reinterpret_cast(us)); + os->write(s, static_cast(strlen(s))); + return os->good() ? GENX_SUCCESS : GENX_IO_ERROR; +} + +extern "C" genxStatus genx_write_bound(void* p, constUtf8 start, constUtf8 end) +{ + ostream* os(static_cast(p)); + const char* s(reinterpret_cast(start)); + streamsize n(static_cast(end - start)); + os->write(s, n); + return os->good() ? GENX_SUCCESS : GENX_IO_ERROR; +} + +extern "C" genxStatus genx_flush(void* p) +{ + ostream* os(static_cast(p)); + os->flush(); + return os->good() ? GENX_SUCCESS : GENX_IO_ERROR; +} + +XMLStreamSerializer::~XMLStreamSerializer() +{ + if (s_ != 0) + genxDispose (s_); +} + +XMLStreamSerializer::XMLStreamSerializer(ostream& os, const string& oname, unsigned short ind) + : os_(os), os_state_(os.exceptions()), oname_(oname), depth_(0) +{ + // Temporarily disable exceptions on the stream. + // + os_.exceptions(ostream::goodbit); + + // Allocate the XMLStreamSerializer. Make sure nothing else can throw after + // this call since otherwise we will leak it. + // + s_ = genxNew(0, 0, 0); + + if (s_ == 0) + throw bad_alloc(); + + genxSetUserData(s_, &os_); + + if (ind != 0) + genxSetPrettyPrint(s_, ind); + + sender_.send = &genx_write; + sender_.sendBounded = &genx_write_bound; + sender_.flush = &genx_flush; + + if (genxStatus e = genxStartDocSender(s_, &sender_)) + { + string m(genxGetErrorMessage(s_, e)); + genxDispose (s_); + throw XMLStreamSerializerException(oname, m); + } +} + +void XMLStreamSerializer::handleError(genxStatus e) +{ + switch (e) + { + case GENX_ALLOC_FAILED: + throw bad_alloc(); + case GENX_IO_ERROR: + // Restoring the original exception state should trigger the + // exception. If it doesn't (e.g., because the user didn't + // configure the stream to throw), then fall back to the + // serialiation exception. + // + os_.exceptions(os_state_); + // Fall through. + default: + throw XMLStreamSerializerException(oname_, genxGetErrorMessage(s_, e)); + } +} + +void XMLStreamSerializer::startElement(const string& ns, const string& name) +{ + if (genxStatus e = genxStartElementLiteral(s_, reinterpret_cast(ns.empty() ? 0 : ns.c_str()), reinterpret_cast(name.c_str()))) + handleError(e); + + depth_++; +} + +void XMLStreamSerializer::endElement() +{ + if (genxStatus e = genxEndElement(s_)) + handleError(e); + + // Call EndDocument() if we are past the root element. + // + if (--depth_ == 0) + { + if (genxStatus e = genxEndDocument(s_)) + handleError(e); + + // Also restore the original exception state on the stream. + // + os_.exceptions(os_state_); + } +} + +void XMLStreamSerializer::element(const string& ns, const string& n, const string& v) +{ + startElement(ns, n); + element(v); +} + +void XMLStreamSerializer::startAttribute(const string& ns, const string& name) +{ + if (genxStatus e = genxStartAttributeLiteral(s_, reinterpret_cast(ns.empty() ? 0 : ns.c_str()), reinterpret_cast(name.c_str()))) + handleError(e); +} + +void XMLStreamSerializer::endAttribute() +{ + if (genxStatus e = genxEndAttribute(s_)) + handleError(e); +} + +void XMLStreamSerializer::attribute(const string& ns, const string& name, const string& value) +{ + if (genxStatus e = genxAddAttributeLiteral(s_, reinterpret_cast(ns.empty() ? 0 : ns.c_str()), reinterpret_cast(name.c_str()), + reinterpret_cast(value.c_str()))) + handleError(e); +} + +void XMLStreamSerializer::characters(const string& value) +{ + if (genxStatus e = genxAddCountedText(s_, reinterpret_cast(value.c_str()), value.size())) + handleError(e); +} + +void XMLStreamSerializer::namespaceDecl(const string& ns, const string& p) +{ + if (genxStatus e = + ns.empty() && p.empty() ? + genxUnsetDefaultNamespace(s_) : + genxAddNamespaceLiteral(s_, reinterpret_cast(ns.c_str()), reinterpret_cast(p.c_str()))) + handleError(e); +} + +void XMLStreamSerializer::xmlDecl(const string& ver, const string& enc, const string& stl) +{ + if (genxStatus e = genxXmlDeclaration(s_, reinterpret_cast(ver.c_str()), (enc.empty() ? 0 : reinterpret_cast(enc.c_str())), + (stl.empty() ? 0 : reinterpret_cast(stl.c_str())))) + handleError(e); +} + +bool XMLStreamSerializer::lookupNamespacePrefix(const string& ns, string& p) +{ + // Currently Genx will create a namespace mapping if one doesn't + // already exist. + // + genxStatus e; + genxNamespace gns(genxDeclareNamespace(s_, reinterpret_cast(ns.c_str()), 0, &e)); + + if (e != GENX_SUCCESS) + handleError(e); + + p = reinterpret_cast(genxGetNamespacePrefix(gns)); + return true; +} +} +} diff --git a/XML/src/XMLStreamSerializerException.cpp b/XML/src/XMLStreamSerializerException.cpp new file mode 100644 index 000000000..c009bbe0e --- /dev/null +++ b/XML/src/XMLStreamSerializerException.cpp @@ -0,0 +1,59 @@ +/// +/// \package metamodel +/// \file XMLStreamException.cpp +/// +/// \author Marian Krivos +/// \date Aug 21, 2015 - 6:52:24 PM +/// \brief definicia typu +/// +/// (C) Copyright 2015 R-SYS,s.r.o +/// All rights reserved. +/// + +#include "XMLStreamParserException.h" +#include "XMLStreamSerializer.h" + +using namespace std; + +namespace Poco +{ +namespace XML +{ + +// XMLStreamSerializerException +// +XMLStreamSerializerException::~XMLStreamSerializerException() throw () +{ +} + +XMLStreamSerializerException::XMLStreamSerializerException(const string& n, const string& d) + : name_(n), description_(d) +{ + init(); +} + +XMLStreamSerializerException::XMLStreamSerializerException(const XMLStreamSerializer& s, const std::string& d) + : name_(s.outputName()), description_(d) +{ + init(); +} + +void XMLStreamSerializerException::init() +{ + if (!name_.empty()) + { + what_ += name_; + what_ += ": "; + } + + what_ += "error: "; + what_ += description_; +} + +char const* XMLStreamSerializerException::what() const throw () +{ + return what_.c_str(); +} + +} /* namespace XML */ +} /* namespace Poco */ diff --git a/XML/src/char-props.c b/XML/src/char-props.c new file mode 100644 index 000000000..1e3b79f12 --- /dev/null +++ b/XML/src/char-props.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2007-2013 Code Synthesis Tools CC. + * Copyright (c) 2004 by Tim Bray and Sun Microsystems. + * + * For copying permission, see the accompanying COPYING file. + */ + +/* + * Construct character-properties tables for genx. + * Quite likely there's a better way. + * This version is generated semi-automatically from the source code of the + * XML specification via emacs global replace and keyboard macros + */ +#include "genx.h" + +static void charProp(char * p, int c, int prop) +{ + p[c] |= prop; +} + +static void rangeProp(char * p, size_t start, size_t end, int prop) +{ + size_t i; + for (i = start; i <= end; i++) + p[i] |= prop; +} + +void genxSetCharProps(char * p) +{ + size_t i; + + for (i = 0; i < GENX_CHAR_TABLE_SIZE; i++) + p[i] = 0; + + /* per XML 1.0 */ + charProp(p, 0x9, GENX_XML_CHAR); + charProp(p, 0xa, GENX_XML_CHAR); + charProp(p, 0xd, GENX_XML_CHAR); + rangeProp(p, 0x20, 0xff, GENX_XML_CHAR); + +#if GENX_CHAR_TABLE_SIZE == 0x10000 + rangeProp(p, 0x0100, 0xd7ff, GENX_XML_CHAR); + rangeProp(p, 0xe000, 0xfffd, GENX_XML_CHAR); +#endif + + /* Letter ::= BaseChar | Ideographic */ + rangeProp(p, 0x0041, 0x005A, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0061, 0x007A, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x00C0, 0x00D6, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x00D8, 0x00F6, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x00F8, 0x00FF, GENX_LETTER|GENX_NAMECHAR); + +#if GENX_CHAR_TABLE_SIZE == 0x10000 + + rangeProp(p, 0x0100, 0x0131, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0134, 0x013E, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0141, 0x0148, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x014A, 0x017E, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0180, 0x01C3, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x01CD, 0x01F0, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x01F4, 0x01F5, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x01FA, 0x0217, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0250, 0x02A8, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x02BB, 0x02C1, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0386, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0388, 0x038A, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x038C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x038E, 0x03A1, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x03A3, 0x03CE, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x03D0, 0x03D6, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x03DA, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x03DC, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x03DE, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x03E0, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x03E2, 0x03F3, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0401, 0x040C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x040E, 0x044F, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0451, 0x045C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x045E, 0x0481, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0490, 0x04C4, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x04C7, 0x04C8, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x04CB, 0x04CC, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x04D0, 0x04EB, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x04EE, 0x04F5, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x04F8, 0x04F9, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0531, 0x0556, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0559, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0561, 0x0586, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x05D0, 0x05EA, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x05F0, 0x05F2, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0621, 0x063A, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0641, 0x064A, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0671, 0x06B7, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x06BA, 0x06BE, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x06C0, 0x06CE, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x06D0, 0x06D3, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x06D5, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x06E5, 0x06E6, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0905, 0x0939, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x093D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0958, 0x0961, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0985, 0x098C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x098F, 0x0990, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0993, 0x09A8, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x09AA, 0x09B0, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x09B2, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x09B6, 0x09B9, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x09DC, 0x09DD, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x09DF, 0x09E1, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x09F0, 0x09F1, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A05, 0x0A0A, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A0F, 0x0A10, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A13, 0x0A28, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A2A, 0x0A30, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A32, 0x0A33, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A35, 0x0A36, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A38, 0x0A39, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A59, 0x0A5C, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0A5E, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A72, 0x0A74, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A85, 0x0A8B, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0A8D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A8F, 0x0A91, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A93, 0x0AA8, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0AAA, 0x0AB0, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0AB2, 0x0AB3, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0AB5, 0x0AB9, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0ABD, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0AE0, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B05, 0x0B0C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B0F, 0x0B10, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B13, 0x0B28, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B2A, 0x0B30, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B32, 0x0B33, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B36, 0x0B39, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0B3D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B5C, 0x0B5D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B5F, 0x0B61, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B85, 0x0B8A, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B8E, 0x0B90, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B92, 0x0B95, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B99, 0x0B9A, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0B9C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B9E, 0x0B9F, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0BA3, 0x0BA4, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0BA8, 0x0BAA, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0BAE, 0x0BB5, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0BB7, 0x0BB9, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C05, 0x0C0C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C0E, 0x0C10, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C12, 0x0C28, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C2A, 0x0C33, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C35, 0x0C39, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C60, 0x0C61, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C85, 0x0C8C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C8E, 0x0C90, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C92, 0x0CA8, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0CAA, 0x0CB3, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0CB5, 0x0CB9, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0CDE, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0CE0, 0x0CE1, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0D05, 0x0D0C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0D0E, 0x0D10, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0D12, 0x0D28, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0D2A, 0x0D39, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0D60, 0x0D61, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E01, 0x0E2E, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0E30, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E32, 0x0E33, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E40, 0x0E45, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E81, 0x0E82, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0E84, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E87, 0x0E88, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0E8A, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0E8D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E94, 0x0E97, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E99, 0x0E9F, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0EA1, 0x0EA3, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0EA5, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0EA7, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0EAA, 0x0EAB, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0EAD, 0x0EAE, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0EB0, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0EB2, 0x0EB3, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0EBD, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0EC0, 0x0EC4, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0F40, 0x0F47, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0F49, 0x0F69, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x10A0, 0x10C5, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x10D0, 0x10F6, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1100, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1102, 0x1103, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1105, 0x1107, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1109, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x110B, 0x110C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x110E, 0x1112, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x113C, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x113E, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1140, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x114C, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x114E, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1150, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1154, 0x1155, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1159, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x115F, 0x1161, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1163, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1165, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1167, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1169, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x116D, 0x116E, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1172, 0x1173, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1175, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x119E, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x11A8, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x11AB, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x11AE, 0x11AF, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x11B7, 0x11B8, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x11BA, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x11BC, 0x11C2, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x11EB, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x11F0, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x11F9, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1E00, 0x1E9B, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1EA0, 0x1EF9, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1F00, 0x1F15, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1F18, 0x1F1D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1F20, 0x1F45, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1F48, 0x1F4D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1F50, 0x1F57, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1F59, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1F5B, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1F5D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1F5F, 0x1F7D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1F80, 0x1FB4, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FB6, 0x1FBC, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x1FBE, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FC2, 0x1FC4, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FC6, 0x1FCC, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FD0, 0x1FD3, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FD6, 0x1FDB, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FE0, 0x1FEC, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FF2, 0x1FF4, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x1FF6, 0x1FFC, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x2126, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x212A, 0x212B, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x212E, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x2180, 0x2182, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x3041, 0x3094, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x30A1, 0x30FA, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x3105, 0x312C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0xAC00, 0xD7A3, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x4E00, 0x9FA5, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x3007, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x3021, 0x3029, GENX_LETTER|GENX_NAMECHAR); + +#endif /* GENX_CHAR_TABLE_SIZE == 0x10000 */ + + /* + * NameChar ::= + * Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender + */ + + charProp(p, '.', GENX_NAMECHAR); + charProp(p, '-', GENX_NAMECHAR); + charProp(p, '_', GENX_NAMECHAR); + + rangeProp(p, 0x0030, 0x0039, GENX_NAMECHAR); + charProp(p, 0x00B7, GENX_LETTER|GENX_NAMECHAR); + +#if GENX_CHAR_TABLE_SIZE == 0x10000 + + rangeProp(p, 0x0660, 0x0669, GENX_NAMECHAR); + rangeProp(p, 0x06F0, 0x06F9, GENX_NAMECHAR); + rangeProp(p, 0x0966, 0x096F, GENX_NAMECHAR); + rangeProp(p, 0x09E6, 0x09EF, GENX_NAMECHAR); + rangeProp(p, 0x0A66, 0x0A6F, GENX_NAMECHAR); + rangeProp(p, 0x0AE6, 0x0AEF, GENX_NAMECHAR); + rangeProp(p, 0x0B66, 0x0B6F, GENX_NAMECHAR); + rangeProp(p, 0x0BE7, 0x0BEF, GENX_NAMECHAR); + rangeProp(p, 0x0C66, 0x0C6F, GENX_NAMECHAR); + rangeProp(p, 0x0CE6, 0x0CEF, GENX_NAMECHAR); + rangeProp(p, 0x0D66, 0x0D6F, GENX_NAMECHAR); + rangeProp(p, 0x0E50, 0x0E59, GENX_NAMECHAR); + rangeProp(p, 0x0ED0, 0x0ED9, GENX_NAMECHAR); + rangeProp(p, 0x0F20, 0x0F29, GENX_NAMECHAR); + rangeProp(p, 0x0300, 0x0345, GENX_NAMECHAR); + rangeProp(p, 0x0360, 0x0361, GENX_NAMECHAR); + rangeProp(p, 0x0483, 0x0486, GENX_NAMECHAR); + rangeProp(p, 0x0591, 0x05A1, GENX_NAMECHAR); + rangeProp(p, 0x05A3, 0x05B9, GENX_NAMECHAR); + rangeProp(p, 0x05BB, 0x05BD, GENX_NAMECHAR); + charProp(p, 0x05BF, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x05C1, 0x05C2, GENX_NAMECHAR); + charProp(p, 0x05C4, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x064B, 0x0652, GENX_NAMECHAR); + charProp(p, 0x0670, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x06D6, 0x06DC, GENX_NAMECHAR); + rangeProp(p, 0x06DD, 0x06DF, GENX_NAMECHAR); + rangeProp(p, 0x06E0, 0x06E4, GENX_NAMECHAR); + rangeProp(p, 0x06E7, 0x06E8, GENX_NAMECHAR); + rangeProp(p, 0x06EA, 0x06ED, GENX_NAMECHAR); + rangeProp(p, 0x0901, 0x0903, GENX_NAMECHAR); + charProp(p, 0x093C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x093E, 0x094C, GENX_NAMECHAR); + charProp(p, 0x094D, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0951, 0x0954, GENX_NAMECHAR); + rangeProp(p, 0x0962, 0x0963, GENX_NAMECHAR); + rangeProp(p, 0x0981, 0x0983, GENX_NAMECHAR); + charProp(p, 0x09BC, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x09BE, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x09BF, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x09C0, 0x09C4, GENX_NAMECHAR); + rangeProp(p, 0x09C7, 0x09C8, GENX_NAMECHAR); + rangeProp(p, 0x09CB, 0x09CD, GENX_NAMECHAR); + charProp(p, 0x09D7, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x09E2, 0x09E3, GENX_NAMECHAR); + charProp(p, 0x0A02, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0A3C, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0A3E, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0A3F, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0A40, 0x0A42, GENX_NAMECHAR); + rangeProp(p, 0x0A47, 0x0A48, GENX_NAMECHAR); + rangeProp(p, 0x0A4B, 0x0A4D, GENX_NAMECHAR); + rangeProp(p, 0x0A70, 0x0A71, GENX_NAMECHAR); + rangeProp(p, 0x0A81, 0x0A83, GENX_NAMECHAR); + charProp(p, 0x0ABC, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0ABE, 0x0AC5, GENX_NAMECHAR); + rangeProp(p, 0x0AC7, 0x0AC9, GENX_NAMECHAR); + rangeProp(p, 0x0ACB, 0x0ACD, GENX_NAMECHAR); + rangeProp(p, 0x0B01, 0x0B03, GENX_NAMECHAR); + charProp(p, 0x0B3C, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0B3E, 0x0B43, GENX_NAMECHAR); + rangeProp(p, 0x0B47, 0x0B48, GENX_NAMECHAR); + rangeProp(p, 0x0B4B, 0x0B4D, GENX_NAMECHAR); + rangeProp(p, 0x0B56, 0x0B57, GENX_NAMECHAR); + rangeProp(p, 0x0B82, 0x0B83, GENX_NAMECHAR); + rangeProp(p, 0x0BBE, 0x0BC2, GENX_NAMECHAR); + rangeProp(p, 0x0BC6, 0x0BC8, GENX_NAMECHAR); + rangeProp(p, 0x0BCA, 0x0BCD, GENX_NAMECHAR); + charProp(p, 0x0BD7, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0C01, 0x0C03, GENX_NAMECHAR); + rangeProp(p, 0x0C3E, 0x0C44, GENX_NAMECHAR); + rangeProp(p, 0x0C46, 0x0C48, GENX_NAMECHAR); + rangeProp(p, 0x0C4A, 0x0C4D, GENX_NAMECHAR); + rangeProp(p, 0x0C55, 0x0C56, GENX_NAMECHAR); + rangeProp(p, 0x0C82, 0x0C83, GENX_NAMECHAR); + rangeProp(p, 0x0CBE, 0x0CC4, GENX_NAMECHAR); + rangeProp(p, 0x0CC6, 0x0CC8, GENX_NAMECHAR); + rangeProp(p, 0x0CCA, 0x0CCD, GENX_NAMECHAR); + rangeProp(p, 0x0CD5, 0x0CD6, GENX_NAMECHAR); + rangeProp(p, 0x0D02, 0x0D03, GENX_NAMECHAR); + rangeProp(p, 0x0D3E, 0x0D43, GENX_NAMECHAR); + rangeProp(p, 0x0D46, 0x0D48, GENX_NAMECHAR); + rangeProp(p, 0x0D4A, 0x0D4D, GENX_NAMECHAR); + charProp(p, 0x0D57, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0E31, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0E34, 0x0E3A, GENX_NAMECHAR); + rangeProp(p, 0x0E47, 0x0E4E, GENX_NAMECHAR); + charProp(p, 0x0EB1, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0EB4, 0x0EB9, GENX_NAMECHAR); + rangeProp(p, 0x0EBB, 0x0EBC, GENX_NAMECHAR); + rangeProp(p, 0x0EC8, 0x0ECD, GENX_NAMECHAR); + rangeProp(p, 0x0F18, 0x0F19, GENX_NAMECHAR); + charProp(p, 0x0F35, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0F37, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0F39, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0F3E, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0F3F, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0F71, 0x0F84, GENX_NAMECHAR); + rangeProp(p, 0x0F86, 0x0F8B, GENX_NAMECHAR); + rangeProp(p, 0x0F90, 0x0F95, GENX_NAMECHAR); + charProp(p, 0x0F97, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x0F99, 0x0FAD, GENX_NAMECHAR); + rangeProp(p, 0x0FB1, 0x0FB7, GENX_NAMECHAR); + charProp(p, 0x0FB9, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x20D0, 0x20DC, GENX_NAMECHAR); + charProp(p, 0x20E1, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x302A, 0x302F, GENX_NAMECHAR); + charProp(p, 0x3099, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x309A, GENX_LETTER|GENX_NAMECHAR); + + charProp(p, 0x02D0, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x02D1, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0387, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0640, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0E46, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x0EC6, GENX_LETTER|GENX_NAMECHAR); + charProp(p, 0x3005, GENX_LETTER|GENX_NAMECHAR); + rangeProp(p, 0x3031, 0x3035, GENX_NAMECHAR); + rangeProp(p, 0x309D, 0x309E, GENX_NAMECHAR); + rangeProp(p, 0x30FC, 0x30FE, GENX_NAMECHAR); + +#endif /* GENX_CHAR_TABLE_SIZE == 0x10000 */ +} diff --git a/XML/src/genx.c b/XML/src/genx.c new file mode 100644 index 000000000..3b2f90502 --- /dev/null +++ b/XML/src/genx.c @@ -0,0 +1,2328 @@ +/* + * Copyright (c) 2007-2013 Code Synthesis Tools CC. + * Copyright (c) 2004 by Tim Bray and Sun Microsystems. + * + * For copying permission, see the accompanying COPYING file. + */ + +#define GENX_VERSION "cs-1" + +#include +#include +#include + +#include "genx.h" + +#define Boolean int +#define True 1 +#define False 0 +#define STRLEN_XMLNS_COLON 6 + + +/******************************* + * writer state + */ +typedef enum +{ + SEQUENCE_NO_DOC, + SEQUENCE_PRE_DOC, + SEQUENCE_POST_DOC, + SEQUENCE_START_TAG, + SEQUENCE_ATTRIBUTES, + SEQUENCE_START_ATTR, + SEQUENCE_CONTENT +} writerSequence; + +/******************************* + * generic pointer list + */ +typedef struct +{ + genxWriter writer; + size_t count; + size_t space; + void * * pointers; +} plist; + +/******************************* + * text collector, for attribute values + */ +typedef struct +{ + utf8 buf; + size_t used; + size_t space; +} collector; + +/******************************* + * Structs with opaquely-exposed handles + */ + +/* + * This one's tricky, to handle stacking namespaces + * 'declaration' is the current attribute which would be used to + * declare the currently-effective prefix + * 'defDeclaration' is a appropriate declaration when this is being + * used with the default prefix as passed to genxDeclareNamespace + * baroque is true if this namespace has been used with more than one + * prefix, or is the default namespace but has been unset + */ +struct genxNamespace_rec +{ + genxWriter writer; + utf8 name; + size_t declCount; + Boolean baroque; + genxAttribute declaration; + genxAttribute defaultDecl; +}; + +struct genxElement_rec +{ + genxWriter writer; + utf8 type; + genxNamespace ns; +}; + +typedef enum +{ + ATTR_NSDECL, + ATTR_NAKED, + ATTR_PREFIXED +} attrType; + +struct genxAttribute_rec +{ + genxWriter writer; + utf8 name; + genxNamespace ns; + collector value; + int provided; /* provided for current element? */ + attrType atype; + genxAttribute next; /* Attribute order chain if not canonical. */ +}; + +/******************************* + * genx's sandbox + */ +struct genxWriter_rec +{ + genxSender * sender; + genxStatus status; + writerSequence sequence; + char xmlChars[GENX_CHAR_TABLE_SIZE]; + void * userData; + int nextPrefix; + utf8 empty; + Boolean defaultNsDeclared; + genxAttribute xmlnsEquals; + genxElement nowStarting; + genxAttribute nowStartingAttr; + plist namespaces; + plist elements; + plist attributes; + plist prefixes; + plist stack; + struct genxAttribute_rec arec; /* Dummy attribute used for lookup. */ + char * etext[100]; + genxAlloc alloc; + genxDealloc dealloc; + + /* Pretty-printing state */ + int ppIndent; + int ppDepth; + Boolean ppSimple; + + /* Canonicalization. */ + Boolean canonical; + + /* Attrbute order when not canonical. */ + genxAttribute firstAttribute; + genxAttribute lastAttribute; +}; + +/******************************* + * Forward declarations + */ +static genxAttribute declareAttribute(genxWriter w, genxNamespace ns, + constUtf8 name, constUtf8 valuestr, + genxStatus * statusP); +static genxStatus addNamespace(genxNamespace ns, constUtf8 prefix); +static genxStatus unsetDefaultNamespace(genxWriter w); +static genxStatus addAttribute(genxAttribute a, constUtf8 valuestr); +void genxSetCharProps(char * p); + +/******************************* + * End of declarations + */ + +/******************************* + * private memory utilities + */ +static void * allocate(genxWriter w, size_t bytes) +{ + if (w->alloc) + return (void *) (*w->alloc)(w->userData, bytes); + else + return (void *) malloc(bytes); +} + +static void deallocate(genxWriter w, void * data) +{ + if (w->dealloc) + (*w->dealloc)(w->userData, data); + else if (w->alloc == NULL) + free(data); +} + +static utf8 copy(genxWriter w, constUtf8 from) +{ + utf8 temp; + + if ((temp = (utf8) allocate(w, strlen((const char *) from) + 1)) == NULL) + return NULL; + strcpy((char *) temp, (const char *) from); + return temp; +} + +static genxStatus initCollector(genxWriter w, collector * c) +{ + c->space = 100; + if ((c->buf = (utf8) allocate(w, c->space)) == NULL) + return GENX_ALLOC_FAILED; + c->used = 0; + return GENX_SUCCESS; +} + +static genxStatus growCollector(genxWriter w, collector * c, size_t size) +{ + utf8 newSpace; + + c->space = size * 2; + if ((newSpace = (utf8) allocate(w, c->space)) == NULL) + return GENX_ALLOC_FAILED; + + strncpy((char *) newSpace, (const char *) c->buf, c->used); + newSpace[c->used] = 0; + deallocate(w, c->buf); + c->buf = newSpace; + return GENX_SUCCESS; +} + +static void startCollect(collector * c) +{ + c->used = 0; +} +static void endCollect(collector * c) +{ + c->buf[c->used] = 0; +} + +static genxStatus collectString(genxWriter w, collector * c, constUtf8 string) +{ + size_t sl = strlen((const char *) string); + + if (sl >= c->space) + if ((w->status = growCollector(w, c, sl)) != GENX_SUCCESS) + return GENX_ALLOC_FAILED; + + strcpy((char *) c->buf, (const char *) string); + return GENX_SUCCESS; +} + +#define collectPiece(w,c,d,size) {if (((c)->used+(size))>=(c)->space){if (((w)->status=growCollector(w,c,(c)->used+(size)))!=GENX_SUCCESS) return (w)->status;}strncpy((char *)(c)->buf+(c)->used,d,size);(c)->used+=size;} + +/******************************* + * private list utilities + */ +static genxStatus initPlist(genxWriter w, plist * pl) +{ + pl->writer = w; + pl->count = 0; + pl->space = 10; + pl->pointers = (void * *) allocate(w, pl->space * sizeof(void *)); + if (pl->pointers == NULL) + return GENX_ALLOC_FAILED; + + return GENX_SUCCESS; +} + +/* + * make room in a plist + */ +static Boolean checkExpand(plist * pl) +{ + void * * newlist; + size_t i; + + if (pl->count < pl->space) + return True; + + pl->space *= 2; + newlist = (void * *) allocate(pl->writer, pl->space * sizeof(void *)); + if (newlist == NULL) + return False; + for (i = 0; i < pl->count; i++) + newlist[i] = pl->pointers[i]; + deallocate(pl->writer, pl->pointers); + pl->pointers = newlist; + + return True; +} + +/* + * stick something on the end of a plist + */ +static genxStatus listAppend(plist * pl, void * pointer) +{ + if (!checkExpand(pl)) + return GENX_ALLOC_FAILED; + + pl->pointers[pl->count++] = pointer; + return GENX_SUCCESS; +} + +/* + * insert in place, shuffling up + */ +static genxStatus listInsert(plist * pl, void * pointer, size_t at) +{ + size_t i; + + if (!checkExpand(pl)) + return GENX_ALLOC_FAILED; + + for (i = pl->count; i > at; i--) + pl->pointers[i] = pl->pointers[i - 1]; + pl->count++; + + pl->pointers[at] = pointer; + return GENX_SUCCESS; +} + +/******************************* + * list lookups + */ + +static genxNamespace findNamespace(plist * pl, constUtf8 uri) +{ + size_t i; + genxNamespace * nn = (genxNamespace *) pl->pointers; + + for (i = 0; i < pl->count; i++) + if (strcmp((char *) uri, (const char *) nn[i]->name) == 0) + return nn[i]; + + return NULL; +} + +static genxElement findElement(plist * pl, constUtf8 xmlns, constUtf8 type) +{ + size_t i; + genxElement * ee = (genxElement *) pl->pointers; + + for (i = 0; i < pl->count; i++) + { + if (xmlns == NULL) + { + if (ee[i]->ns == NULL && strcmp((const char *) type, + (const char *) ee[i]->type) == 0) + return ee[i]; + } + else + { + if (ee[i]->ns != NULL && + strcmp((const char *) xmlns, (const char *) ee[i]->ns->name) == 0 && + strcmp((const char *) type, (const char *) ee[i]->type) == 0) + return ee[i]; + } + } + + return NULL; +} + +/* + * store & intern a prefix, after giving it the + * "xmlns:" prefix. Don't allow storing the same one twice unless 'force' + * is set. + */ +static utf8 storePrefix(genxWriter w, constUtf8 prefix, Boolean force) +{ + int high, low; + utf8 * pp = (utf8 *) w->prefixes.pointers; + unsigned char buf[1024]; + + if (prefix[0] == 0) + prefix = (utf8) "xmlns"; + else + { + sprintf((char *) buf, "xmlns:%s", prefix); + prefix = buf; + } + + high = (int) w->prefixes.count; + low = -1; + while (high - low > 1) + { + int probe = (high + low) / 2; + if (strcmp((const char *) prefix, (const char *) pp[probe]) < 0) + high = probe; + else + low = probe; + } + + /* already there? */ + if (low != -1 && strcmp((const char *) prefix, (const char *) pp[low]) == 0) + { + if (force) + return pp[low]; + + w->status = GENX_DUPLICATE_PREFIX; + return NULL; + } + + /* copy & insert */ + if ((prefix = copy(w, prefix)) == NULL) + { + w->status = GENX_ALLOC_FAILED; + return NULL; + } + + w->status = listInsert(&w->prefixes, (void *) prefix, (size_t) high); + if (w->status != GENX_SUCCESS) + return NULL; + + return (utf8) prefix; +} + +/******************************* + * UTF8 bit-banging + */ + +/* + * Retrieve the character pointed at, and advance the pointer; return -1 on + * error + */ +int genxNextUnicodeChar(constUtf8 * sp) +{ + utf8 s = (utf8) *sp; + int c; + + if (*s == 0) + return -1; + + if (*s < 0x80) + c = *s++; + + /* all this encoding sanity-checking taken from section 3.10 of Unicode 4 */ + else if (*s < 0xc2) + goto malformed; + + /* 2-byte encodings, first byte c2 .. df */ + else if (*s < 0xe0) + { + c = (*s++ & 0x1f) << 6; + + /* + * for this common idiom, if ((c & 0xc0) != 0x80) is slightly faster + * on MacOS (PPC) + */ + if (*s < 0x80 || *s > 0xbf) + goto malformed; + + c |= *s++ & 0x3f; + } + + /* 3-byte encodings, first byte e0 .. ef */ + else if (*s < 0xf0) + { + int b0 = *s; + c = (*s++ & 0x0f) << 12; + + if ((b0 == 0xe0 && (*s < 0xa0 || *s > 0xbf)) || + (b0 < 0xed && (*s < 0x80 || *s > 0xbf)) || + (b0 == 0xed && (*s < 0x80 || *s > 0x9f)) || + (b0 > 0xed && (*s < 0x80 || *s > 0xbf))) + goto malformed; + + c |= (*s++ & 0x3f) << 6; + + if (*s < 0x80 || *s > 0xbf) + goto malformed; + + c |= *s++ & 0x3f; + } + + /* 4-byte encodings, first byte f0 .. f4 */ + else if (*s < 0xf5) + { + int b0 = *s; + c = (*s++ & 0x07) << 18; + + if ((b0 == 0xf0 && (*s < 0x90 || *s > 0xbf)) || + (b0 < 0xf4 && (*s < 0x80 || *s > 0xbf)) || + (b0 >= 0xf4 && (*s < 0x80 || *s > 0x8f))) + goto malformed; + + c |= (*s++ & 0x3f) << 12; + + if (*s < 0x80 || *s > 0xbf) + goto malformed; + + c |= (*s++ & 0x3f) << 6; + + if (*s < 0x80 || *s > 0xbf) + goto malformed; + + c |= *s++ & 0x3f; + } + else + goto malformed; + + *sp = s; + return c; + + /* + * this is needed by scrubText, which wants to get the pointer moved + * past the problem area. + */ +malformed: + if (*s) + ++s; + *sp = s; + return -1; +} + +static Boolean isXMLChar(genxWriter w, int c) +{ + if (c < 0) + return False; + else if (c < GENX_CHAR_TABLE_SIZE) + return (int) w->xmlChars[c]; + else + return (c <= 0x10ffff); +} + +static Boolean isLetter(genxWriter w, int c) +{ + if (c < 0 || c > 0xffff) + return False; + else + { +#if GENX_CHAR_TABLE_SIZE == 0x10000 + return w->xmlChars[c] & GENX_LETTER; +#else + return c < GENX_CHAR_TABLE_SIZE ? (w->xmlChars[c] & GENX_LETTER) : True; +#endif + } +} + +static Boolean isNameChar(genxWriter w, int c) +{ + if (c < 0 || c > 0xffff) + return False; + else + { +#if GENX_CHAR_TABLE_SIZE == 0x10000 + return w->xmlChars[c] & GENX_NAMECHAR; +#else + return c < GENX_CHAR_TABLE_SIZE ? (w->xmlChars[c] & GENX_NAMECHAR) : True; +#endif + } +} + +/******************************* + * Constructors, setters/getters + */ + +/* + * Construct a new genxWriter + */ +genxWriter genxNew(genxAlloc alloc, genxDealloc dealloc, void * userData) +{ + genxWriter w; + genxNamespace xml; + + if (alloc) + w = (genxWriter) (*alloc)(userData, sizeof(struct genxWriter_rec)); + else + w = (genxWriter) malloc(sizeof(struct genxWriter_rec)); + + if (w == NULL) + return NULL; + + w->status = GENX_SUCCESS; + w->alloc = alloc; + w->dealloc = dealloc; + w->userData = userData; + w->sequence = SEQUENCE_NO_DOC; + + if (initPlist(w, &w->namespaces) != GENX_SUCCESS || + initPlist(w, &w->elements) != GENX_SUCCESS || + initPlist(w, &w->attributes) != GENX_SUCCESS || + initPlist(w, &w->prefixes) != GENX_SUCCESS || + initPlist(w, &w->stack) != GENX_SUCCESS) + return NULL; + + if ((w->status = initCollector(w, &w->arec.value)) != GENX_SUCCESS) + return NULL; + + if ((w->empty = copy(w, (utf8) "")) == NULL) + { + w->status = GENX_ALLOC_FAILED; + return NULL; + } + + w->xmlnsEquals = declareAttribute(w, NULL, (utf8) "xmlns", NULL, &w->status); + if (w->xmlnsEquals == NULL || w->status != GENX_SUCCESS) + return NULL; + w->defaultNsDeclared = False; + + w->nextPrefix = 1; + + genxSetCharProps(w->xmlChars); + + w->etext[GENX_SUCCESS] = "success"; + w->etext[GENX_BAD_UTF8] = "invalid UTF-8"; + w->etext[GENX_NON_XML_CHARACTER] = "non-XML character"; + w->etext[GENX_BAD_NAME] = "invalid name"; + w->etext[GENX_ALLOC_FAILED] = "memory allocation failed"; + w->etext[GENX_BAD_NAMESPACE_NAME] = "invalid namespace name"; + w->etext[GENX_INTERNAL_ERROR] = "internal error"; + w->etext[GENX_DUPLICATE_PREFIX] = "duplicate prefix"; + w->etext[GENX_SEQUENCE_ERROR] = "call out of sequence"; + w->etext[GENX_NO_START_TAG] = "no start tag for end element call"; + w->etext[GENX_IO_ERROR] = "io error"; + w->etext[GENX_MISSING_VALUE] = "missing attribute value"; + w->etext[GENX_MALFORMED_COMMENT] = "malformed comment body"; + w->etext[GENX_MALFORMED_PI] = "?> in PI"; + w->etext[GENX_XML_PI_TARGET] = "target of PI matches [xX][mM][lL]"; + w->etext[GENX_DUPLICATE_ATTRIBUTE] = "duplicate attribute"; + w->etext[GENX_ATTRIBUTE_IN_DEFAULT_NAMESPACE] = + "attribute is default namespace"; + w->etext[GENX_DUPLICATE_NAMESPACE] = + "namespace declared twice with different prefixes"; + w->etext[GENX_BAD_DEFAULT_DECLARATION] = + "default namespace declared on an element which is not in a namespace"; + + /* the xml: namespace is pre-wired */ + xml = genxDeclareNamespace(w, (utf8) "http://www.w3.org/XML/1998/namespace", + (utf8) "xml", &w->status); + if (xml == NULL) + return NULL; + xml->declCount = 1; + xml->declaration = xml->defaultDecl; + + w->ppIndent = 0; /* Pretty-printing is disabled by default. */ + w->canonical = False; /* No canonicalization by default. */ + + w->firstAttribute = NULL; + w->lastAttribute = NULL; + return w; +} + +genxStatus genxReset (genxWriter w) +{ + size_t i; + + /* Clean up the stack. */ + w->stack.count = 0; + + /* Reset namespace declaration counts. The first entry is the pre-wired + xml namespace. */ + ((genxNamespace) w->namespaces.pointers[0])->declCount = 1; + + for (i = 1; i < w->namespaces.count; i++) + { + ((genxNamespace) w->namespaces.pointers[i])->declCount = 0; + ((genxNamespace) w->namespaces.pointers[i])->baroque = False; + } + + /* Clear provided attributes. */ + for (i = 0; i < w->attributes.count; i++) + ((genxAttribute) w->attributes.pointers[i])->provided = False; + + /* Clear attribute list. */ + if (!w->canonical) + { + while (w->firstAttribute != NULL) + { + genxAttribute t = w->firstAttribute->next; + w->firstAttribute->next = NULL; + w->firstAttribute = t; + } + + w->lastAttribute = NULL; + } + + w->status = GENX_SUCCESS; + w->sequence = SEQUENCE_NO_DOC; + + return w->status; +} + + +/* + * get/set userData + */ +void genxSetUserData(genxWriter w, void * userData) +{ + w->userData = userData; +} +void * genxGetUserData(genxWriter w) +{ + return w->userData; +} + +/* + * get/set pretty-printing + */ +genxStatus genxSetPrettyPrint(genxWriter w, int ind) +{ + if (w->sequence == SEQUENCE_NO_DOC) + w->ppIndent = ind; + else + w->status = GENX_SEQUENCE_ERROR; + + return w->status; +} + +int genxGetPrettyPrint(genxWriter w) +{ + return w->ppIndent; +} + +/* + * get/set canonicalization. + */ +genxStatus genxSetCanonical(genxWriter w, int flag) +{ + if (w->sequence == SEQUENCE_NO_DOC) + w->canonical = flag; + else + w->status = GENX_SEQUENCE_ERROR; + + return w->status; +} + +int genxGetCanonical(genxWriter w) +{ + return w->canonical; +} + +/* + * get/set allocator + */ +void genxSetAlloc(genxWriter w, genxAlloc alloc) +{ + w->alloc = alloc; +} + +void genxSetDealloc(genxWriter w, genxDealloc dealloc) +{ + w->dealloc = dealloc; +} + +genxAlloc genxGetAlloc(genxWriter w) +{ + return w->alloc; +} + +genxDealloc genxGetDealloc(genxWriter w) +{ + return w->dealloc; +} + +/* + * Clean up + */ +void genxDispose(genxWriter w) +{ + size_t i; + genxNamespace * nn = (genxNamespace *) w->namespaces.pointers; + genxElement * ee = (genxElement *) w->elements.pointers; + genxAttribute * aa = (genxAttribute *) w->attributes.pointers; + utf8 * pp = (utf8 *) w->prefixes.pointers; + + for (i = 0; i < w->namespaces.count; i++) + { + deallocate(w, nn[i]->name); + deallocate(w, nn[i]); + } + + for (i = 0; i < w->elements.count; i++) + { + deallocate(w, ee[i]->type); + deallocate(w, ee[i]); + } + + for (i = 0; i < w->attributes.count; i++) + { + deallocate(w, aa[i]->name); + deallocate(w, aa[i]->value.buf); + deallocate(w, aa[i]); + } + + for(i = 0; i < w->prefixes.count; i++) + deallocate(w, pp[i]); + + deallocate(w, w->namespaces.pointers); + deallocate(w, w->elements.pointers); + deallocate(w, w->attributes.pointers); + deallocate(w, w->prefixes.pointers); + deallocate(w, w->stack.pointers); + + deallocate(w, w->arec.value.buf); + + deallocate(w, w->empty); + + /* how Oscar dealt with Igli */ + deallocate(w, w); +} + +/******************************* + * External utility routines + */ + +/* + * scan a buffer and report problems with UTF-8 encoding or non-XML characters + */ +genxStatus genxCheckText(genxWriter w, constUtf8 s) +{ + while (*s) + { + int c = genxNextUnicodeChar(&s); + if (c == -1) + return GENX_BAD_UTF8; + + if (!isXMLChar(w, c)) + return GENX_NON_XML_CHARACTER; + } + return GENX_SUCCESS; +} + +/* + * Purify some text + */ +int genxScrubText(genxWriter w, constUtf8 in, utf8 out) +{ + int problems = 0; + constUtf8 last = in; + + while (*in) + { + int c = genxNextUnicodeChar(&in); + if (c == -1) + { + problems++; + last = in; + continue; + } + + if (!isXMLChar(w, c)) + { + problems++; + last = in; + continue; + } + + while (last < in) + *out++ = *last++; + } + *out = 0; + return problems; +} + +/* + * check one character + */ +int genxCharClass(genxWriter w, int c) +{ + int ret = 0; + + if (isXMLChar(w, c)) + ret |= GENX_XML_CHAR; + if (isNameChar(w, c)) + ret |= GENX_NAMECHAR; + if (isLetter(w, c)) + ret |= GENX_LETTER; + return ret; +} + +static genxStatus checkNCName(genxWriter w, constUtf8 name) +{ + int c; + + if (name == NULL || *name == 0) + return GENX_BAD_NAME; + + c = genxNextUnicodeChar(&name); + if (!isLetter(w, c) && c != ':' && c != '_') + return GENX_BAD_NAME; + + while (*name) + { + c = genxNextUnicodeChar(&name); + if (c == -1) + return GENX_BAD_UTF8; + if (!isNameChar(w, c)) + return GENX_BAD_NAME; + } + return GENX_SUCCESS; +} + +char * genxGetErrorMessage(genxWriter w, genxStatus status) +{ + return w->etext[status]; +} +char * genxLastErrorMessage(genxWriter w) +{ + return w->etext[w->status]; +} + +/******************************* + * Declarations: namespace/element/attribute + */ + +/* + * DeclareNamespace - by far the most complex routine in Genx + */ +genxNamespace genxDeclareNamespace(genxWriter w, constUtf8 uri, + constUtf8 defaultPref, + genxStatus * statusP) +{ + genxNamespace ns; + genxAttribute defaultDecl; + unsigned char newPrefix[100]; + + if (uri == NULL || uri[0] == 0) + { + w->status = GENX_BAD_NAMESPACE_NAME; + goto busted; + } + + if ((w->status = genxCheckText(w, uri)) != GENX_SUCCESS) + goto busted; + + /* if a prefix is provided, it has to be an NCname */ + if (defaultPref != NULL && defaultPref[0] != 0 && + (w->status = checkNCName(w, defaultPref)) != GENX_SUCCESS) + goto busted; + + /* previously declared? */ + if ((ns = findNamespace(&w->namespaces, uri))) + { + /* just a lookup, really */ + if ((defaultPref == NULL) || + (defaultPref[0] == 0 && ns->defaultDecl == w->xmlnsEquals) || + (strcmp((const char *) ns->defaultDecl->name + STRLEN_XMLNS_COLON, + (const char *) defaultPref) == 0)) + { + w->status = *statusP = GENX_SUCCESS; + return ns; + } + } + + /* wasn't already declared */ + else + { + + /* make a default prefix if none provided */ + if (defaultPref == NULL) + { + sprintf((char *) newPrefix, "g%d", w->nextPrefix++); + defaultPref = newPrefix; + } + + ns = (genxNamespace) allocate(w, sizeof(struct genxNamespace_rec)); + if (ns == NULL) + { + w->status = GENX_ALLOC_FAILED; + goto busted; + } + ns->writer = w; + ns->baroque = False; + + if ((ns->name = copy(w, uri)) == NULL) + { + w->status = GENX_ALLOC_FAILED; + goto busted; + } + + if ((w->status = listAppend(&w->namespaces, ns)) != GENX_SUCCESS) + goto busted; + ns->defaultDecl = ns->declaration = NULL; + ns->declCount = 0; + } + + if (defaultPref[0] == 0) + { + if (w->defaultNsDeclared) + { + w->status = GENX_DUPLICATE_PREFIX; + goto busted; + } + defaultDecl = w->xmlnsEquals; + w->defaultNsDeclared = True; + } + else + { + /* this catches dupes too */ + if ((defaultPref = storePrefix(w, defaultPref, False)) == NULL) + goto busted; + + defaultDecl = declareAttribute(w, NULL, defaultPref, ns->name, statusP); + if (defaultDecl == NULL || *statusP != GENX_SUCCESS) + { + w->status = *statusP; + return NULL; + } + } + + if (ns->defaultDecl != NULL && defaultDecl != ns->defaultDecl) + ns->baroque = True; + ns->defaultDecl = defaultDecl; + + *statusP = GENX_SUCCESS; + return ns; + +busted: + *statusP = w->status; + return NULL; +} + +/* + * get namespace prefix + */ +utf8 genxGetNamespacePrefix(genxNamespace ns) +{ + if (ns->declaration == NULL) + return NULL; + + if (ns->declaration == ns->writer->xmlnsEquals) + return ns->writer->empty; + + return ns->declaration->name + STRLEN_XMLNS_COLON; +} + +/* + * DeclareElement - see genx.h for details + */ +genxElement genxDeclareElement(genxWriter w, + genxNamespace ns, constUtf8 type, + genxStatus * statusP) +{ + genxElement old; + genxElement el; + + if ((w->status = checkNCName(w, type)) != GENX_SUCCESS) + { + *statusP = w->status; + return NULL; + } + + /* already declared? */ + old = findElement(&w->elements, (ns == NULL) ? NULL : ns->name, type); + if (old) + return old; + + if ((el = (genxElement) allocate(w, sizeof(struct genxElement_rec))) == NULL) + { + w->status = *statusP = GENX_ALLOC_FAILED; + return NULL; + } + + el->writer = w; + el->ns = ns; + if ((el->type = copy(w, type)) == NULL) + { + w->status = *statusP = GENX_ALLOC_FAILED; + return NULL; + } + + if ((w->status = listAppend(&w->elements, el)) != GENX_SUCCESS) + { + *statusP = w->status; + return NULL; + } + + *statusP = GENX_SUCCESS; + return el; +} + +/* + * C14n ordering for attributes: + * - first, namespace declarations by the prefix being declared + * - second, unprefixed attributes by attr name + * - third, prefixed attrs by ns uri then local part + */ +static int orderAttributes(genxAttribute a1, genxAttribute a2) +{ + if (a1->atype == a2->atype) + { + if (a1->atype == ATTR_PREFIXED && a1->ns != a2->ns) + return strcmp((const char *) a1->ns->name, (const char *) a2->ns->name); + else + return strcmp((const char *) a1->name, (const char *) a2->name); + } + + else if (a1->atype == ATTR_NSDECL) + return -1; + + else if (a1->atype == ATTR_NAKED) + { + if (a2->atype == ATTR_NSDECL) + return 1; + else + return -1; + } + + else + return 1; +} + +/* + * internal declare-attribute. This one allows colonized values for + * names, so that you can declare xmlns:-type attributes + */ +static genxAttribute declareAttribute(genxWriter w, genxNamespace ns, + constUtf8 name, constUtf8 valuestr, + genxStatus * statusP) +{ + int high, low; + genxAttribute * aa = (genxAttribute *) w->attributes.pointers; + genxAttribute a; + + w->arec.ns = ns; + w->arec.name = (utf8) name; + + if (ns) + w->arec.atype = ATTR_PREFIXED; + else if (strncmp((const char *) name, "xmlns", STRLEN_XMLNS_COLON - 1) == 0) + w->arec.atype = ATTR_NSDECL; + else + w->arec.atype = ATTR_NAKED; + + if (ns && (ns->defaultDecl == w->xmlnsEquals)) + { + w->status = GENX_ATTRIBUTE_IN_DEFAULT_NAMESPACE; + goto busted; + } + + /* attribute list has to be kept sorted per c14n rules */ + high = (int) w->attributes.count; + low = -1; + while (high - low > 1) + { + int probe = (high + low) / 2; + if (orderAttributes(&w->arec, aa[probe]) < 0) + high = probe; + else + low = probe; + } + + /* if it was already there */ + if (low != -1 && orderAttributes(&w->arec, aa[low]) == 0) + return aa[low]; + + /* not there, build it */ + a = (genxAttribute) allocate(w, sizeof(struct genxAttribute_rec)); + if (a == NULL) + { + w->status = GENX_ALLOC_FAILED; + goto busted; + } + + a->writer = w; + a->ns = ns; + a->provided = False; + a->atype = w->arec.atype; + a->next = NULL; + + if ((a->name = copy(w, name)) == NULL) + { + w->status = GENX_ALLOC_FAILED; + goto busted; + } + + if ((w->status = initCollector(w, &a->value)) != GENX_SUCCESS) + goto busted; + + if (valuestr) + if ((w->status = collectString(w, &a->value, valuestr)) != GENX_SUCCESS) + goto busted; + + w->status = listInsert(&w->attributes, a, (size_t) high); + if (w->status != GENX_SUCCESS) + goto busted; + + *statusP = GENX_SUCCESS; + return a; + +busted: + *statusP = w->status; + return NULL; +} + +/* + * genxDeclareAttribute - see genx.h for details + */ +genxAttribute genxDeclareAttribute(genxWriter w, + genxNamespace ns, constUtf8 name, + genxStatus * statusP) +{ + if ((w->status = checkNCName(w, name)) != GENX_SUCCESS) + { + *statusP = w->status; + return NULL; + } + + return declareAttribute(w, ns, name, NULL, statusP); +} + +/******************************* + * I/O + */ +static genxStatus sendx(genxWriter w, constUtf8 s) +{ + if (w->sender) + return (*w->sender->send)(w->userData, s); + else + return GENX_IO_ERROR; +} + +static genxStatus sendxBounded(genxWriter w, constUtf8 start, constUtf8 end) +{ + if (w->sender) + return (*w->sender->sendBounded)(w->userData, start, end); + else + return GENX_IO_ERROR; +} + +#define SendCheck(w,s) if ((w->status=sendx(w,(constUtf8)s))!=GENX_SUCCESS) return w->status + +/******************************* + * XML writing routines. The semantics of the externally-facing ones are + * written up in genx.h. Commentary here is implementation notes and + * for internal routines. + */ + +genxStatus genxStartDocSender(genxWriter w, genxSender * sender) +{ + if (w->sequence != SEQUENCE_NO_DOC) + return w->status = GENX_SEQUENCE_ERROR; + + w->sequence = SEQUENCE_PRE_DOC; + w->sender = sender; + + if (w->ppIndent) + { + w->ppSimple = True; + w->ppDepth = 0; + } + + return GENX_SUCCESS; +} + +/* + * Output new line and indentation. + */ +static genxStatus writeIndentation(genxWriter w) +{ + int i, n; + SendCheck(w, "\n"); + n = w->ppDepth * w->ppIndent; + + for (i = 0; i < n; i++) + SendCheck(w, " "); + + return w->status; +} + +/* + * Output attribute. + */ +static genxStatus writeAttribute(genxAttribute a) +{ + genxWriter w = a->writer; + + if (a->ns && a->ns->baroque && a->ns->declaration == w->xmlnsEquals) + return w->status = GENX_ATTRIBUTE_IN_DEFAULT_NAMESPACE; + + SendCheck(w, " "); + + if (a->ns) + { + SendCheck(w, a->ns->declaration->name + STRLEN_XMLNS_COLON); + SendCheck(w, ":"); + } + + SendCheck(w, a->name); + SendCheck(w, "=\""); + SendCheck(w, a->value.buf); + SendCheck(w, "\""); + + return w->status; +} + +/* + * Write out the attributes we've been gathering up for an element. We save + * them until we've gathered them all so they can be writen in canonical + * order. + * Also, we end the start-tag. + * The trick here is that we keep the attribute list properly sorted as + * we build it, then as each attribute is added, we fill in its value and + * mark the fact that it's been added, in the "provided" field. + */ +static genxStatus writeStartTag(genxWriter w, Boolean close) +{ + size_t i; + genxAttribute * aa = (genxAttribute *) w->attributes.pointers; + genxElement e = w->nowStarting; + + /* + * make sure the right namespace decls are in effect; + * if they are these might create an error, so ignore it + */ + if (e->ns) + addNamespace(e->ns, NULL); + else + unsetDefaultNamespace(w); + w->status = GENX_SUCCESS; + + if (w->ppIndent) + { + if (w->ppDepth) + if (writeIndentation (w) != GENX_SUCCESS) + return w->status; + + if (!close) + { + w->ppDepth++; + w->ppSimple = True; + } + } + + SendCheck(w, "<"); + if (e->ns && (e->ns->declaration != w->xmlnsEquals)) + { + SendCheck(w, e->ns->declaration->name + STRLEN_XMLNS_COLON); + SendCheck(w, ":"); + } + SendCheck(w, e->type); + + /* If we are canonicalizing, then write sorted attributes. Otherwise + write them in the order specified. */ + if (w->canonical) + { + for (i = 0; i < w->attributes.count; i++) + { + if (aa[i]->provided) + { + if (writeAttribute (aa[i]) != GENX_SUCCESS) + return w->status; + + aa[i]->provided = False; + } + } + } + else + { + /* Keep the chain consistent even if we bail out mid way because of + an error. This way we will still be able to clear it in reset().*/ + while (w->firstAttribute != NULL) + { + genxAttribute t = w->firstAttribute->next; + + if (writeAttribute (w->firstAttribute) != GENX_SUCCESS) + return w->status; + + w->firstAttribute->provided = False; + w->firstAttribute->next = NULL; + w->firstAttribute = t; + } + + w->lastAttribute = NULL; + } + + if (close) + SendCheck(w, "/"); + SendCheck(w, ">"); + return GENX_SUCCESS; +} + +/* + * internal clear-er; no sequence checking + */ +static genxStatus unsetDefaultNamespace(genxWriter w) +{ + int i; + Boolean found = False; + + /* don't put it in if not needed */ + i = (int) (w->stack.count) - 1; + while (found == False && i > 0) + { + while (w->stack.pointers[i] != NULL) + { + genxAttribute decl = (genxAttribute) w->stack.pointers[i--]; + genxNamespace ns = (genxNamespace) w->stack.pointers[i--]; + + /* if already unset */ + if (ns == NULL) + return w->status = GENX_SUCCESS; + + /* + * the default namespace was declared. This namespace now + * becomes baroque + */ + if (decl == w->xmlnsEquals) + { + ns->baroque = True; + found = True; + break; + } + } + i -= 2; + } + + if (!found) + return GENX_SUCCESS; + + /* + * push a signal on the stack + */ + if ((w->status = listAppend(&w->stack, NULL)) != GENX_SUCCESS) + return w->status; + w->status = listAppend(&w->stack, w->xmlnsEquals); + if (w->status != GENX_SUCCESS) + return w->status; + + /* add the xmlns= attribute, it must be the first one */ + return addAttribute(w->xmlnsEquals, w->empty); +} + +/* + * clear the default namespace declaration + */ +genxStatus genxUnsetDefaultNamespace(genxWriter w) +{ + + /* can only do this while in start-tag mode */ + if (w->sequence != SEQUENCE_START_TAG) + return w->status = GENX_SEQUENCE_ERROR; + + return unsetDefaultNamespace(w); +} + +genxStatus genxStartElement(genxElement e) +{ + genxWriter w = e->writer; + + switch (w->sequence) + { + case SEQUENCE_NO_DOC: + case SEQUENCE_POST_DOC: + case SEQUENCE_START_ATTR: + return w->status = GENX_SEQUENCE_ERROR; + case SEQUENCE_START_TAG: + case SEQUENCE_ATTRIBUTES: + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) + return w->status; + break; + case SEQUENCE_PRE_DOC: + case SEQUENCE_CONTENT: + break; + } + + w->sequence = SEQUENCE_START_TAG; + + /* + * push the stack. We push a NULL after a pointer to this element + * because the stack will also contain pointers to the namespace + * attributes that got declared here, so we can keep track of what's + * in effect. I.e. a single stack entry consists logically of a pointer + * to an element object, a NULL, then zero or more pairs of pointers to + * namespace objects/declarations + */ + if ((w->status = listAppend(&w->stack, e)) != GENX_SUCCESS) + return w->status; + if ((w->status = listAppend(&w->stack, NULL)) != GENX_SUCCESS) + return w->status; + + w->nowStarting = e; + + return GENX_SUCCESS; +} + +/* + * internal namespace adder; no sequence checking + */ +static genxStatus addNamespace(genxNamespace ns, constUtf8 prefix) +{ + genxWriter w = ns->writer; + genxAttribute decl; + int i; + genxElement e; + + /* + * first, we'll find the declaring attribute + */ + if (prefix == NULL) + decl = ns->defaultDecl; + else + { + if (prefix[0] == 0) + decl = w->xmlnsEquals; + else + { + if ((prefix = storePrefix(w, prefix, True)) == NULL) + return w->status; + decl = declareAttribute(w, NULL, prefix, ns->name, &w->status); + if (decl == NULL || w->status != GENX_SUCCESS) + return w->status; + } + } + + if (decl != ns->defaultDecl) + ns->baroque = True; + + /* + * avoid doing anything if this namespace is already declared. If + * they've shown good taste we can do this cheaply + */ + if (!ns->baroque) + { + if (ns->declCount > 0) + return w->status = GENX_SUCCESS; + } + else + { + + /* + * First, we'll run all the way up the stack to see if there is + * another declaration for this namespace/prefix in scope, in which + * case it's a no-op; or, if there's another declaration for this + * prefix on another namespace, in which case we have to over-ride + */ + i = (int) (w->stack.count) - 1; + while (i > 0) + { + while (w->stack.pointers[i] != NULL) + { + genxAttribute otherDecl = (genxAttribute) w->stack.pointers[i--]; + genxNamespace otherNs = (genxNamespace) w->stack.pointers[i--]; + + if (ns == otherNs) + { + if (decl == otherDecl) + return w->status = GENX_SUCCESS; + else + { + i = 0; + break; + } + } + else + { + /* different namespace, same prefix? */ + if (decl == otherDecl) + { + i = 0; + break; + } + } + } + i -= 2; + } + } + + /* + * If this namespace is already declared on + * this element (with different prefix/decl) which is an error. + */ + i = (int) (w->stack.count) - 1; + while (w->stack.pointers[i] != NULL) + { + genxNamespace otherNs; + i--; /* don't need declaration */ + otherNs = (genxNamespace) w->stack.pointers[i--]; + + if (ns == otherNs) + return w->status = GENX_DUPLICATE_NAMESPACE; + } + + /* move pointer from NULL to element */ + --i; + + /* + * It's also an error if this is a default-namespace declaration and the + * element is in no namespace. + */ + e = (genxElement) w->stack.pointers[i]; + if (e->ns == NULL && decl == w->xmlnsEquals) + return w->status = GENX_BAD_DEFAULT_DECLARATION; + + if ((w->status = listAppend(&w->stack, ns)) != GENX_SUCCESS) + return w->status; + if ((w->status = listAppend(&w->stack, decl)) != GENX_SUCCESS) + return w->status; + + ns->declaration = decl; + ns->declCount++; + return addAttribute(decl, ns->name); +} + +/* + * Add a namespace declaration + */ +genxStatus genxAddNamespace(genxNamespace ns, constUtf8 prefix) +{ + if (ns->writer->sequence != SEQUENCE_START_TAG) + return ns->writer->status = GENX_SEQUENCE_ERROR; + + return addNamespace(ns, prefix); +} + +/* + * Private attribute-adding code + * most of the work here is normalizing the value, which is the same + * as regular normalization except for " is replaced by """ + */ +static genxStatus collectAttributeValue (genxWriter w, collector* value, + constUtf8 start, constUtf8 end) +{ + /* If end is NULL then the length of the value is unknown and + the value is 0-terminated. */ + + utf8 last = (utf8) start; + + while (end != NULL ? start < end : *start) + { + int c = genxNextUnicodeChar(&start); + + if (c == -1) + return w->status = GENX_BAD_UTF8; + + if (!isXMLChar(w, c)) + return w->status = GENX_NON_XML_CHARACTER; + + switch(c) + { + case 9: + collectPiece(w, value, " ", 5); + break; + case 0xa: + collectPiece(w, value, " ", 5); + break; + case 0xd: + collectPiece(w, value, " ", 5); + break; + case '"': + collectPiece(w, value, """, 6); + break; + case '<': + collectPiece(w, value, "<", 4); + break; + case '&': + collectPiece(w, value, "&", 5); + break; + /* + case '>': + collectPiece(w, value, ">", 4); + break; + */ + default: + collectPiece(w, value, (const char *) last, start - last); + break; + } + last = (utf8) start; + } + + return GENX_SUCCESS; +} + +static genxStatus addAttribute(genxAttribute a, constUtf8 valuestr) +{ + genxWriter w = a->writer; + + /* if valuestr not provided, this is an xmlns with a pre-cooked value */ + if (valuestr) + { + startCollect(&a->value); + + if (collectAttributeValue (w, &a->value, valuestr, NULL) != GENX_SUCCESS) + return w->status; + + endCollect(&a->value); + } + + /* now add the namespace attribute; might fail if it's been hand-declared */ + if (a->ns) + addNamespace(a->ns, NULL); + + if (valuestr && a->provided) + return w->status = GENX_DUPLICATE_ATTRIBUTE; + + a->provided = True; + + /* Add the attribute to the ordered list if not canonical. */ + if (!w->canonical) + { + if (w->lastAttribute != NULL) + w->lastAttribute = w->lastAttribute->next = a; + else + w->lastAttribute = w->firstAttribute = a; + } + + return GENX_SUCCESS; +} + +/* + * public attribute adder. + * The only difference is that it doesn't allow a NULL value + */ +genxStatus genxAddAttribute(genxAttribute a, constUtf8 valuestr) +{ + if (a->writer->sequence != SEQUENCE_START_TAG && + a->writer->sequence != SEQUENCE_ATTRIBUTES) + return a->writer->status = GENX_SEQUENCE_ERROR; + a->writer->sequence = SEQUENCE_ATTRIBUTES; + + if (valuestr == NULL) + return a->writer->status = GENX_MISSING_VALUE; + + return addAttribute(a, valuestr); +} + +genxStatus genxStartAttribute(genxAttribute a) +{ + genxWriter w = a->writer; + + if (w->sequence != SEQUENCE_START_TAG && + w->sequence != SEQUENCE_ATTRIBUTES) + return w->status = GENX_SEQUENCE_ERROR; + + w->sequence = SEQUENCE_START_ATTR; + w->nowStartingAttr = a; + + startCollect(&a->value); + + return GENX_SUCCESS; +} + +genxStatus genxEndAttribute(genxWriter w) +{ + genxAttribute a; + + if (w->sequence != SEQUENCE_START_ATTR) + return w->status = GENX_SEQUENCE_ERROR; + + a = w->nowStartingAttr; + w->sequence = SEQUENCE_ATTRIBUTES; + + endCollect(&a->value); + + /* now add the namespace attribute; might fail if it's been hand-declared */ + if (a->ns) + addNamespace(a->ns, NULL); + + if (a->provided) + return w->status = GENX_DUPLICATE_ATTRIBUTE; + + a->provided = True; + + /* Add the attribute to the ordered list if not canonical. */ + if (!w->canonical) + { + if (w->lastAttribute != NULL) + w->lastAttribute = w->lastAttribute->next = a; + else + w->lastAttribute = w->firstAttribute = a; + } + + return GENX_SUCCESS; +} + +genxStatus genxEndElement(genxWriter w) +{ + int i; + Boolean close = True; + + switch (w->sequence) + { + case SEQUENCE_NO_DOC: + case SEQUENCE_PRE_DOC: + case SEQUENCE_POST_DOC: + case SEQUENCE_START_ATTR: + return w->status = GENX_SEQUENCE_ERROR; + case SEQUENCE_START_TAG: + case SEQUENCE_ATTRIBUTES: + if ((w->status = writeStartTag(w, !w->canonical)) != GENX_SUCCESS) + return w->status; + close = w->canonical; + break; + case SEQUENCE_CONTENT: + break; + } + + /* + * Output the closing tag. + */ + if (close) + { + genxElement e; + + /* + * first peek into the stack to find the right namespace declaration + * (if any) so we can properly prefix the end-tag. Have to do this + * before unwinding the stack because that might reset some xmlns + * prefixes to the context in the parent element + */ + for (i = (int) (w->stack.count) - 1; + w->stack.pointers[i] != NULL; + i -= 2) + ; + e = (genxElement) w->stack.pointers[--i]; + + if (w->ppIndent) + { + w->ppDepth--; + + if (!w->ppSimple) + if (writeIndentation (w) != GENX_SUCCESS) + return w->status; + } + + SendCheck(w, "ns && e->ns->declaration != w->xmlnsEquals) + { + SendCheck(w, e->ns->declaration->name + STRLEN_XMLNS_COLON); + SendCheck(w, ":"); + } + SendCheck(w, e->type); + SendCheck(w, ">"); + } + + if (w->ppIndent) + w->ppSimple = False; + + /* + * pop zero or more namespace declarations, then a null, then the + * start-element declaration off the stack + */ + w->stack.count--; + while (w->stack.pointers[w->stack.count] != NULL) + { + genxNamespace ns = (genxNamespace) w->stack.pointers[--w->stack.count]; + w->stack.count--; /* don't need decl */ + + /* if not a fake unset-default namespace */ + if (ns) + { + /* + * if they've stupidly jammed in their own namespace-prefix + * declarations, we have to go looking to see if there's another + * one in effect + */ + if (ns->baroque) + { + i = (int) w->stack.count; + while (i > 0) + { + while (w->stack.pointers[i] != NULL) + { + genxAttribute otherDecl = (genxAttribute) w->stack.pointers[i--]; + genxNamespace otherNs = (genxNamespace) w->stack.pointers[i--]; + + if (otherNs == ns) + { + ns->declaration = otherDecl; + i = 0; + break; + } + } + + /* skip NULL & element */ + i -= 2; + } + } + ns->declCount--; + if (ns->declCount == 0) + ns->baroque = False; + } + } + + /* pop the NULL */ + if (w->stack.count == 0) + return w->status = GENX_NO_START_TAG; + --w->stack.count; + + if (w->stack.count == 0) + w->sequence = SEQUENCE_POST_DOC; + else + w->sequence = SEQUENCE_CONTENT; + + return GENX_SUCCESS; +} + +/* + * Internal character-adder. It tries to keep the number of sendx() + * calls down by looking at each character but only doing the output + * when it has to escape something; ordinary text gets saved up in + * chunks the start of which is indicated by *breaker. + * c is the character, next points to the UTF8 representing the next + * lastsP indirectly points to the UTF8 representing the + * character, breakerP* indirectly points to the last place genx + * changed the UTF8, e.g. by escaping a '<' + */ +static genxStatus addChar(genxWriter w, int c, constUtf8 next, + constUtf8 * lastsP, constUtf8 * breakerP) +{ + if (c == -1) + return GENX_BAD_UTF8; + + if (!isXMLChar(w, c)) + return GENX_NON_XML_CHARACTER; + + switch(c) + { + case 0xd: + if ((w->status = sendxBounded(w, *breakerP, *lastsP)) != GENX_SUCCESS) + return w->status; + *breakerP = next; + sendx(w, (utf8) " "); + break; + case '<': + if ((w->status = sendxBounded(w, *breakerP, *lastsP)) != GENX_SUCCESS) + return w->status; + *breakerP = next; + sendx(w, (utf8) "<"); + break; + case '&': + if ((w->status = sendxBounded(w, *breakerP, *lastsP)) != GENX_SUCCESS) + return w->status; + *breakerP = next; + sendx(w, (utf8) "&"); + break; + case '>': + if ((w->status = sendxBounded(w, *breakerP, *lastsP)) != GENX_SUCCESS) + return w->status; + *breakerP = next; + sendx(w, (utf8) ">"); + break; + default: + break; + } + *lastsP = next; + return GENX_SUCCESS; +} + +genxStatus genxAddText(genxWriter w, constUtf8 start) +{ + constUtf8 lasts = start; + constUtf8 breaker = start; + + if (w->sequence == SEQUENCE_START_TAG || + w->sequence == SEQUENCE_ATTRIBUTES) + { + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) + return w->status; + w->sequence = SEQUENCE_CONTENT; + } + + if (w->sequence == SEQUENCE_CONTENT) + { + while (*start) + { + int c = genxNextUnicodeChar(&start); + + w->status = addChar(w, c, start, &lasts, &breaker); + if (w->status != GENX_SUCCESS) + return w->status; + } + return sendxBounded(w, breaker, (utf8) start); + } + else if (w->sequence == SEQUENCE_START_ATTR) + { + return collectAttributeValue (w, &w->nowStartingAttr->value, start, NULL); + } + else + return w->status = GENX_SEQUENCE_ERROR; +} + +genxStatus genxAddBoundedText(genxWriter w, constUtf8 start, constUtf8 end) +{ + constUtf8 lasts = start; + constUtf8 breaker = start; + + if (w->sequence == SEQUENCE_START_TAG || + w->sequence == SEQUENCE_ATTRIBUTES) + { + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) + return w->status; + w->sequence = SEQUENCE_CONTENT; + } + + if (w->sequence == SEQUENCE_CONTENT) + { + while (start < end) + { + int c = genxNextUnicodeChar(&start); + + w->status = addChar(w, c, (utf8) start, &lasts, &breaker); + if (w->status != GENX_SUCCESS) + return w->status; + } + return sendxBounded(w, breaker, (utf8) start); + } + else if (w->sequence == SEQUENCE_START_ATTR) + { + return collectAttributeValue (w, &w->nowStartingAttr->value, start, end); + } + else + return w->status = GENX_SEQUENCE_ERROR; +} + +genxStatus genxAddCountedText(genxWriter w, constUtf8 start, size_t byteCount) +{ + utf8 end = (utf8) (start + byteCount); + + return genxAddBoundedText(w, start, end); +} + +genxStatus genxAddCharacter(genxWriter w, int c) +{ + unsigned char cUTF8[10]; + utf8 lasts, breaker, next; + + if (w->sequence == SEQUENCE_START_TAG || + w->sequence == SEQUENCE_ATTRIBUTES) + { + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) + return w->status; + w->sequence = SEQUENCE_CONTENT; + } + + if (!isXMLChar(w, c)) + return w->status = GENX_NON_XML_CHARACTER; + + if (w->sequence == SEQUENCE_START_ATTR) + { + int done = 1; + collector* value = &w->nowStartingAttr->value; + + switch(c) + { + case 9: + collectPiece(w, value, " ", 5); + break; + case 0xa: + collectPiece(w, value, " ", 5); + break; + case 0xd: + collectPiece(w, value, " ", 5); + break; + case '"': + collectPiece(w, value, """, 6); + break; + case '<': + collectPiece(w, value, "<", 4); + break; + case '&': + collectPiece(w, value, "&", 5); + break; + /* + case '>': + collectPiece(w, value, ">", 4); + break; + */ + default: + done = 0; + break; + } + + if (done) + return GENX_SUCCESS; + } + + /* make UTF8 representation of character */ + lasts = breaker = next = cUTF8; + + if (c < 0x80) + *next++ = c; + else if (c < 0x800) + { + *next++ = 0xc0 | (c >> 6); + *next++ = 0x80 | (c & 0x3f); + } + else if (c < 0x10000) + { + *next++ = 0xe0 | (c >> 12); + *next++ = 0x80 | ((c & 0xfc0) >> 6); + *next++ = 0x80 | (c & 0x3f); + } + else + { + *next++ = 0xf0 | (c >> 18); + *next++ = 0x80 | ((c & 0x3f000) >> 12); + *next++ = 0x80 | ((c & 0xfc0) >> 6); + *next++ = 0x80 | (c & 0x3f); + } + *next = 0; + + if (w->sequence == SEQUENCE_CONTENT) + { + w->status = + addChar(w, c, next, (constUtf8 *) &lasts, (constUtf8 *) &breaker); + + if (w->status != GENX_SUCCESS) + return w->status; + + return sendxBounded(w, breaker, next); + } + else if (w->sequence == SEQUENCE_START_ATTR) + { + collectPiece(w, &w->nowStartingAttr->value, + (const char *) cUTF8, next - cUTF8); + return GENX_SUCCESS; + } + else + return w->status = GENX_SEQUENCE_ERROR; +} + +genxStatus genxEndDocument(genxWriter w) +{ + if (w->sequence != SEQUENCE_POST_DOC) + return w->status = GENX_SEQUENCE_ERROR; + + /* Write a newline after the closing tag. */ + SendCheck (w, "\n"); + + if ((w->status = (*w->sender->flush)(w->userData)) != GENX_SUCCESS) + return w->status; + + w->sequence = SEQUENCE_NO_DOC; + return GENX_SUCCESS; +} + +genxStatus genxXmlDeclaration(genxWriter w, + constUtf8 ver, + constUtf8 enc, + constUtf8 stl) +{ + if (w->sequence != SEQUENCE_PRE_DOC) + return w->status = GENX_SEQUENCE_ERROR; + + if ((w->status = genxCheckText(w, ver)) != GENX_SUCCESS) + return w->status; + + if (enc != NULL && (w->status = genxCheckText(w, enc)) != GENX_SUCCESS) + return w->status; + + if (stl != NULL && (w->status = genxCheckText(w, stl)) != GENX_SUCCESS) + return w->status; + + SendCheck (w, "\n"); + + return GENX_SUCCESS; +} + +genxStatus genxComment(genxWriter w, constUtf8 text) +{ + size_t i; + + if (w->sequence == SEQUENCE_NO_DOC || + w->sequence == SEQUENCE_START_ATTR) + return w->status = GENX_SEQUENCE_ERROR; + + if ((w->status = genxCheckText(w, text)) != GENX_SUCCESS) + return w->status; + + /* no leading '-', no trailing '-', no '--' */ + if (text[0] == '-') + return w->status = GENX_MALFORMED_COMMENT; + for (i = 0; text[i]; i++) + if (text[i] == '-' && (text[i + 1] == '-' || text[i + 1] == 0)) + return w->status = GENX_MALFORMED_COMMENT; + + if (w->sequence == SEQUENCE_START_TAG || + w->sequence == SEQUENCE_ATTRIBUTES) + { + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) + return w->status; + w->sequence = SEQUENCE_CONTENT; + } + + else if (w->sequence == SEQUENCE_POST_DOC) + if ((w->status = sendx(w, (utf8) "\n")) != GENX_SUCCESS) + return w->status; + + if ((w->status = sendx(w, (utf8) "")) != GENX_SUCCESS) + return w->status; + + if (w->sequence == SEQUENCE_PRE_DOC) + if ((w->status = sendx(w, (utf8) "\n")) != GENX_SUCCESS) + return w->status; + + return GENX_SUCCESS; +} + +genxStatus genxPI(genxWriter w, constUtf8 target, constUtf8 text) +{ + size_t i; + + if (w->sequence == SEQUENCE_NO_DOC || + w->sequence == SEQUENCE_START_ATTR) + return w->status = GENX_SEQUENCE_ERROR; + + if ((w->status = genxCheckText(w, target)) != GENX_SUCCESS) + return w->status; + if ((w->status = checkNCName(w, target)) != GENX_SUCCESS) + return w->status; + if ((strlen((const char *) target) >= 3) && + (target[0] == 'x' || target[0] == 'X') && + (target[1] == 'm' || target[1] == 'M') && + (target[2] == 'l' || target[2] == 'L') && + (target[3] == 0)) + return w->status = GENX_XML_PI_TARGET; + + if ((w->status = genxCheckText(w, text)) != GENX_SUCCESS) + return w->status; + + /* no ?> within */ + for (i = 1; text[i]; i++) + if (text[i] == '>' && text[i - 1] == '?') + return w->status = GENX_MALFORMED_PI; + + if (w->sequence == SEQUENCE_START_TAG || + w->sequence == SEQUENCE_ATTRIBUTES) + { + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) + return w->status; + w->sequence = SEQUENCE_CONTENT; + } + + else if (w->sequence == SEQUENCE_POST_DOC) + if ((w->status = sendx(w, (utf8) "\n")) != GENX_SUCCESS) + return w->status; + + if ((w->status = sendx(w, (utf8) "status; + if ((w->status = sendx(w, target)) != GENX_SUCCESS) + return w->status; + if (text[0]) + { + if ((w->status = sendx(w, (utf8) " ")) != GENX_SUCCESS) + return w->status; + if ((w->status = sendx(w, text)) != GENX_SUCCESS) + return w->status; + } + if ((w->status = sendx(w, (utf8) "?>")) != GENX_SUCCESS) + return w->status; + + if (w->sequence == SEQUENCE_PRE_DOC) + if ((w->status = sendx(w, (utf8) "\n")) != GENX_SUCCESS) + return w->status; + + return GENX_SUCCESS; +} + +/******************************* + * Literal versions of the writing routines + */ +genxStatus genxStartElementLiteral(genxWriter w, + constUtf8 xmlns, constUtf8 type) +{ + genxNamespace ns = NULL; + genxElement e; + + if (xmlns) + { + ns = genxDeclareNamespace(w, xmlns, NULL, &w->status); + if (ns == NULL || w->status != GENX_SUCCESS) + return w->status; + } + e = genxDeclareElement(w, ns, type, &w->status); + if (e == NULL || w->status != GENX_SUCCESS) + return w->status; + + return genxStartElement(e); +} + +genxStatus genxAddAttributeLiteral(genxWriter w, constUtf8 xmlns, + constUtf8 name, constUtf8 value) +{ + genxNamespace ns = NULL; + genxAttribute a; + + if (xmlns) + { + ns = genxDeclareNamespace(w, xmlns, NULL, &w->status); + if (ns == NULL && w->status != GENX_SUCCESS) + return w->status; + } + + a = genxDeclareAttribute(w, ns, name, &w->status); + if (a == NULL || w->status != GENX_SUCCESS) + return w->status; + + return genxAddAttribute(a, value); +} + +genxStatus genxStartAttributeLiteral(genxWriter w, + constUtf8 xmlns, constUtf8 name) +{ + genxNamespace ns = NULL; + genxAttribute a; + + if (xmlns) + { + ns = genxDeclareNamespace(w, xmlns, NULL, &w->status); + if (ns == NULL && w->status != GENX_SUCCESS) + return w->status; + } + + a = genxDeclareAttribute(w, ns, name, &w->status); + if (a == NULL || w->status != GENX_SUCCESS) + return w->status; + + return genxStartAttribute(a); +} + +genxStatus genxAddNamespaceLiteral(genxWriter w, + constUtf8 uri, constUtf8 prefix) +{ + genxNamespace ns = genxDeclareNamespace(w, uri, prefix, &w->status); + if (ns == NULL && w->status != GENX_SUCCESS) + return w->status; + + return genxAddNamespace(ns, NULL); +} + +/* + * return version + */ +char * genxGetVersion() +{ + return GENX_VERSION; +} diff --git a/XML/src/genx.h b/XML/src/genx.h new file mode 100644 index 000000000..bdaeee1f6 --- /dev/null +++ b/XML/src/genx.h @@ -0,0 +1,343 @@ +/* + * genx - C-callable library for generating XML documents + */ + +/* + * Copyright (c) 2007-2013 Code Synthesis Tools CC. + * Copyright (c) 2004 by Tim Bray and Sun Microsystems. + * + * For copying permission, see the accompanying COPYING file. + */ + +#ifndef GENX_H +#define GENX_H + +#include /* size_t */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Note on error handling: genx routines mostly return + * GENX_SUCCESS (guaranteed to be zero) in normal circumstances, one of + * these other GENX_ values on a memory allocation or I/O failure or if the + * call would result in non-well-formed output. + * You can associate an error message with one of these codes explicitly + * or with the most recent error using genxGetErrorMessage() and + * genxLastErrorMessage(); see below. + */ +typedef enum +{ + GENX_SUCCESS = 0, + GENX_BAD_UTF8, + GENX_NON_XML_CHARACTER, + GENX_BAD_NAME, + GENX_ALLOC_FAILED, + GENX_BAD_NAMESPACE_NAME, + GENX_INTERNAL_ERROR, + GENX_DUPLICATE_PREFIX, + GENX_SEQUENCE_ERROR, + GENX_NO_START_TAG, + GENX_IO_ERROR, + GENX_MISSING_VALUE, + GENX_MALFORMED_COMMENT, + GENX_XML_PI_TARGET, + GENX_MALFORMED_PI, + GENX_DUPLICATE_ATTRIBUTE, + GENX_ATTRIBUTE_IN_DEFAULT_NAMESPACE, + GENX_DUPLICATE_NAMESPACE, + GENX_BAD_DEFAULT_DECLARATION +} genxStatus; + +/* character types */ +#define GENX_XML_CHAR 1 +#define GENX_LETTER 2 +#define GENX_NAMECHAR 4 + +/* The size of the character table. Valid values are 0x100 (first 255 + chars are checked) and 0x10000 (all chars are checked). */ +#ifndef GENX_CHAR_TABLE_SIZE +# define GENX_CHAR_TABLE_SIZE 0x100 +#endif + +/* a UTF-8 string */ +typedef unsigned char * utf8; +typedef const unsigned char * constUtf8; + +/* + * genx's own types + */ +typedef struct genxWriter_rec * genxWriter; +typedef struct genxNamespace_rec * genxNamespace; +typedef struct genxElement_rec * genxElement; +typedef struct genxAttribute_rec * genxAttribute; + +typedef void * (*genxAlloc) (void * userData, size_t bytes); +typedef void (*genxDealloc) (void * userData, void* data); + +/* + * Constructors, set/get + */ + +/* + * Create a new writer. For generating multiple XML documents, it's most + * efficient to re-use the same genx object. However, you can only write + * one document at a time with a writer. + * Returns NULL if it fails, which can only be due to an allocation failure. + */ +genxWriter genxNew(genxAlloc alloc, genxDealloc dealloc, void * userData); + +/* + * Reset the writer object back into usable state after an error or + * interruption. + */ +genxStatus genxReset (genxWriter w); + +/* + * Dispose of a writer, freeing all associated memory + */ +void genxDispose(genxWriter w); + +/* + * Set/get + */ + +/* + * The userdata pointer will be passed to memory-allocation + * and I/O callbacks. If not set, genx will pass NULL + */ +void genxSetUserData(genxWriter w, void * userData); +void * genxGetUserData(genxWriter w); + +/* + * Set/get pretty-printing. If indentation is set to 0, then no pretty- + * printing is performed. + */ +genxStatus genxSetPrettyPrint(genxWriter w, int indentation); +int genxGetPrettyPrint(genxWriter w); + +/* + * Set/get canonicalization. If true, then output explicit closing + * tags and sort attributes. Default is false. + */ +genxStatus genxSetCanonical(genxWriter w, int flag); +int genxGetCanonical(genxWriter w); + +/* + * User-provided memory allocator, if desired. For example, if you were + * in an Apache module, you could arrange for genx to use ap_palloc by + * making the pool accessible via the userData call. + * The "dealloc" is to be used to free memory allocated with "alloc". If + * alloc is provided but dealloc is NULL, genx will not attempt to free + * the memory; this would be appropriate in an Apache context. + * If "alloc" is not provided, genx routines use malloc() to allocate memory + */ +void genxSetAlloc(genxWriter w, genxAlloc alloc); +void genxSetDealloc(genxWriter w, genxDealloc dealloc); +genxAlloc genxGetAlloc(genxWriter w); +genxDealloc genxGetDealloc(genxWriter w); + +/* + * Get the prefix associated with a namespace + */ +utf8 genxGetNamespacePrefix(genxNamespace ns); + +/* + * Declaration functions + */ + +/* + * Declare a namespace. The provided prefix is the default but can be + * overridden by genxAddNamespace. If no default prefiix is provided, + * genx will generate one of the form g-%d. + * On error, returns NULL and signals via statusp + */ +genxNamespace genxDeclareNamespace(genxWriter w, + constUtf8 uri, constUtf8 prefix, + genxStatus * statusP); + +/* + * Declare an element + * If something failed, returns NULL and sets the status code via statusP + */ +genxElement genxDeclareElement(genxWriter w, + genxNamespace ns, constUtf8 type, + genxStatus * statusP); + +/* + * Declare an attribute + */ +genxAttribute genxDeclareAttribute(genxWriter w, + genxNamespace ns, + constUtf8 name, genxStatus * statusP); + +/* + * Writing XML + */ + +/* + * Caller-provided I/O package. + * First form is for a null-terminated string. + * for second, if you have s="abcdef" and want to send "abc", you'd call + * sendBounded(userData, s, s + 3) + */ +typedef struct +{ + genxStatus (* send)(void * userData, constUtf8 s); + genxStatus (* sendBounded)(void * userData, constUtf8 start, constUtf8 end); + genxStatus (* flush)(void * userData); +} genxSender; + +genxStatus genxStartDocSender(genxWriter w, genxSender * sender); + +/* + * End a document. Calls "flush" + */ +genxStatus genxEndDocument(genxWriter w); + +/* + * Write XML declaration. If encoding or standalone are NULL, then those + * attributes are omitted. + */ +genxStatus genxXmlDeclaration(genxWriter w, + constUtf8 version, + constUtf8 encoding, + constUtf8 standalone); +/* + * Write a comment + */ +genxStatus genxComment(genxWriter w, constUtf8 text); + +/* + * Write a PI + */ +genxStatus genxPI(genxWriter w, constUtf8 target, constUtf8 text); + +/* + * Start an element + */ +genxStatus genxStartElementLiteral(genxWriter w, + constUtf8 xmlns, constUtf8 type); + +/* + * Start a predeclared element + * - element must have been declared + */ +genxStatus genxStartElement(genxElement e); + +/* + * Write an attribute + */ +genxStatus genxAddAttributeLiteral(genxWriter w, constUtf8 xmlns, + constUtf8 name, constUtf8 value); + +/* + * Start an attribute + */ +genxStatus genxStartAttributeLiteral(genxWriter w, + constUtf8 xmlns, constUtf8 name); + +/* + * Write a predeclared attribute + */ +genxStatus genxAddAttribute(genxAttribute a, constUtf8 value); + +/* + * Start a predeclared attribute + */ +genxStatus genxStartAttribute(genxAttribute a); + +/* + * End an attribute + */ +genxStatus genxEndAttribute(genxWriter w); + +/* + * add a namespace declaration + */ +genxStatus genxAddNamespaceLiteral(genxWriter w, + constUtf8 uri, constUtf8 prefix); + +/* + * add a predefined namespace declaration + */ +genxStatus genxAddNamespace(genxNamespace ns, constUtf8 prefix); + +/* + * Clear default namespace declaration + */ +genxStatus genxUnsetDefaultNamespace(genxWriter w); + +/* + * Write an end tag + */ +genxStatus genxEndElement(genxWriter w); + +/* + * Write some text + * You can't write any text outside the root element, except with + * genxComment and genxPI. + */ +genxStatus genxAddText(genxWriter w, constUtf8 start); +genxStatus genxAddCountedText(genxWriter w, constUtf8 start, size_t byteCount); +genxStatus genxAddBoundedText(genxWriter w, constUtf8 start, constUtf8 end); + +/* + * Write one character. The integer value is the Unicode character + * value, as usually expressed in U+XXXX notation. + */ +genxStatus genxAddCharacter(genxWriter w, int c); + +/* + * Utility routines + */ + +/* + * Return the Unicode character encoded by the UTF-8 pointed-to by the + * argument, and advance the argument past the encoding of the character. + * Returns -1 if the UTF-8 is malformed, in which case advances the + * argument to point at the first byte past the point past the malformed + * ones. + */ +int genxNextUnicodeChar(constUtf8 * sp); + +/* + * Scan a buffer allegedly full of UTF-8 encoded XML characters; return + * one of GENX_SUCCESS, GENX_BAD_UTF8, or GENX_NON_XML_CHARACTER + */ +genxStatus genxCheckText(genxWriter w, constUtf8 s); + +/* + * return character status, the OR of GENX_XML_CHAR, + * GENX_LETTER, and GENX_NAMECHAR + */ +int genxCharClass(genxWriter w, int c); + +/* + * Silently wipe any non-XML characters out of a chunk of text. + * If you call this on a string before you pass it addText or + * addAttribute, you will never get an error from genx unless + * (a) there's a bug in your software, e.g. a malformed element name, or + * (b) there's a memory allocation or I/O error + * The output can never be longer than the input. + * Returns true if any changes were made. + */ +int genxScrubText(genxWriter w, constUtf8 in, utf8 out); + +/* + * return error messages + */ +char * genxGetErrorMessage(genxWriter w, genxStatus status); +char * genxLastErrorMessage(genxWriter w); + +/* + * return version + */ +char * genxGetVersion(); + +#ifdef __cplusplus +} +#endif + +#endif /* GENX_H */