From cd6422fde3301c1a14a2d13bf819c562044c7c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnter=20Obiltschnig?= Date: Sun, 23 May 2021 21:23:28 +0200 Subject: [PATCH] expose million laughs attack protection implemented by Expat 2.4 --- XML/include/Poco/SAX/SAXParser.h | 22 ++++-- XML/include/Poco/XML/ParserEngine.h | 86 ++++++++++++++-------- XML/src/ParserEngine.cpp | 107 ++++++++++++++++++++-------- XML/src/SAXParser.cpp | 9 ++- 4 files changed, 157 insertions(+), 67 deletions(-) diff --git a/XML/include/Poco/SAX/SAXParser.h b/XML/include/Poco/SAX/SAXParser.h index 587e61c59..2fefc6702 100644 --- a/XML/include/Poco/SAX/SAXParser.h +++ b/XML/include/Poco/SAX/SAXParser.h @@ -28,7 +28,7 @@ namespace XML { class XML_API SAXParser: public XMLReader - /// This class provides a SAX2 (Simple API for XML) interface to expat, + /// This class provides a SAX2 (Simple API for XML) interface to expat, /// the XML parser toolkit. /// The following SAX2 features and properties are supported: /// * http://xml.org/sax/features/external-general-entities @@ -41,6 +41,14 @@ class XML_API SAXParser: public XMLReader /// The following proprietary extensions are supported: /// * http://www.appinf.com/features/enable-partial-reads -- /// see ParserEngine::setEnablePartialReads() + /// * http://www.appinf.com/properties/bla-maximum-amplification + /// see ParserEngine::setBillionLaughsAttackProtectionMaximumAmplification(); + /// argument must be a float >= 1.0 formatted as string; + /// property is set-only. + /// * http://www.appinf.com/properties/bla-activation-threshold + /// see ParserEngine::setBillionLaughsAttackProtectionActivationThreshold(); + /// argument must be a 64-bit unsigned integer formatted as string; + /// property is set-only. { public: SAXParser(); @@ -48,14 +56,14 @@ public: SAXParser(const XMLString& encoding); /// Creates an SAXParser with the given encoding. - + ~SAXParser(); /// Destroys the SAXParser. - + void setEncoding(const XMLString& encoding); /// Sets the encoding used by the parser if no /// encoding is specified in the XML document. - + const XMLString& getEncoding() const; /// Returns the name of the encoding used by /// the parser if no encoding is specified in @@ -81,11 +89,13 @@ public: void parse(InputSource* pSource); void parse(const XMLString& systemId); void parseMemoryNP(const char* xml, std::size_t size); - + /// Extensions void parseString(const std::string& xml); - + static const XMLString FEATURE_PARTIAL_READS; + static const XMLString PROPERTY_BLA_MAXIMUM_AMPLIFICATION; + static const XMLString PROPERTY_BLA_ACTIVATION_THRESHOLD; protected: void setupParse(); diff --git a/XML/include/Poco/XML/ParserEngine.h b/XML/include/Poco/XML/ParserEngine.h index 665d378f9..e0c845500 100644 --- a/XML/include/Poco/XML/ParserEngine.h +++ b/XML/include/Poco/XML/ParserEngine.h @@ -47,28 +47,28 @@ class ContextLocator; class XML_API ParserEngine: public Locator - /// This class provides an object-oriented, stream-based, + /// This class provides an object-oriented, stream-based, /// low-level interface to the XML Parser Toolkit (expat). /// It is strongly recommended, that you use the /// SAX parser classes (which are based on this - /// class) instead of this class, since they provide + /// class) instead of this class, since they provide /// a standardized, higher-level interface to the parser. { public: ParserEngine(); /// Creates the parser engine. - + ParserEngine(const XMLString& encoding); /// Creates the parser engine and passes the encoding /// to the underlying parser. - + ~ParserEngine(); /// Destroys the parser. void setEncoding(const XMLString& encoding); /// Sets the encoding used by expat. The encoding must be /// set before parsing begins, otherwise it will be ignored. - + const XMLString& getEncoding() const; /// Returns the encoding used by expat. @@ -80,33 +80,33 @@ public: /// The parser takes ownership of the strategy object /// and deletes it when it's no longer needed. /// The default is NoNamespacesStrategy. - + NamespaceStrategy* getNamespaceStrategy() const; /// Returns the NamespaceStrategy currently in use. void setExpandInternalEntities(bool flag = true); /// Enables/disables expansion of internal entities (enabled by - /// default). If entity expansion is disabled, internal entities + /// default). If entity expansion is disabled, internal entities /// are reported via the default handler. /// Must be set before parsing begins, otherwise it will be /// ignored. - + bool getExpandInternalEntities() const; /// Returns true if internal entities will be expanded automatically, /// which is the default. void setExternalGeneralEntities(bool flag = true); /// Enable or disable processing of external general entities. - + bool getExternalGeneralEntities() const; /// Returns true if external general entities will be processed; false otherwise. void setExternalParameterEntities(bool flag = true); /// Enable or disable processing of external parameter entities. - + bool getExternalParameterEntities() const; /// Returns true if external parameter entities will be processed; false otherwise. - + void setEntityResolver(EntityResolver* pResolver); /// Allow an application to register an entity resolver. @@ -121,7 +121,7 @@ public: void setDeclHandler(DeclHandler* pDeclHandler); /// Allow an application to register a DTD declarations event handler. - + DeclHandler* getDeclHandler() const; /// Return the current DTD declarations handler. @@ -133,7 +133,7 @@ public: void setLexicalHandler(LexicalHandler* pLexicalHandler); /// Allow an application to register a lexical event handler. - + LexicalHandler* getLexicalHandler() const; /// Return the current lexical handler. @@ -142,12 +142,12 @@ public: ErrorHandler* getErrorHandler() const; /// Return the current error handler. - + void setEnablePartialReads(bool flag = true); /// Enable or disable partial reads from the input source. /// /// This is useful for parsing XML from a socket stream for - /// a protocol like XMPP, where basically single elements + /// a protocol like XMPP, where basically single elements /// are read one at a time from the input source's stream, and /// following elements depend upon responses sent back to /// the peer. @@ -158,21 +158,44 @@ public: /// This allows for efficient parsing of "complete" XML documents, /// but fails in a case such as XMPP, where only XML fragments /// are sent at a time. - + bool getEnablePartialReads() const; /// Returns true if partial reads are enabled (see /// setEnablePartialReads()), false otherwise. - + + void setBillionLaughsAttackProtectionMaximumAmplification(float maximumAmplificationFactor); + /// Sets the maximum tolerated amplification factor + /// for protection against Billion Laughs Attacks. + /// + /// The amplification factor is calculated as: + /// amplification := (direct + indirect) / direct + /// while parsing, whereas: + /// - direct is the number of bytes read from the primary document in parsing and + /// - indirect is the number of bytes added by expanding entities and reading of + /// external DTD files, combined. + /// + /// maximumAmplificationFactor must be non-NaN and greater than or equal to 1.0. + /// + /// Requires an underlying Expat version >= 2.4.0. + + void setBillionLaughsAttackProtectionActivationThreshold(Poco::UInt64 activationThresholdBytes); + /// Sets number of output bytes (including amplification from entity expansion and reading DTD files) + /// needed to activate protection against Billion Laughs Attacks. + /// + /// Defaults to 8 MiB. + /// + /// Requires an underlying Expat version >= 2.4.0. + void parse(InputSource* pInputSource); /// Parse an XML document from the given InputSource. - + void parse(const char* pBuffer, std::size_t size); /// Parses an XML document from the given buffer. - + // Locator XMLString getPublicId() const; /// Return the public identifier for the current document event. - + XMLString getSystemId() const; /// Return the system identifier for the current document event. @@ -180,7 +203,7 @@ public: /// Return the line number where the current document event ends. int getColumnNumber() const; - /// Return the column number where the current document event ends. + /// Return the column number where the current document event ends. protected: void init(); @@ -191,7 +214,7 @@ protected: void parseCharInputStream(XMLCharInputStream& istr); /// Parses an entity from the given stream. - + std::streamsize readBytes(XMLByteInputStream& istr, char* pBuffer, std::streamsize bufferSize); /// Reads at most bufferSize bytes from the given stream into the given buffer. @@ -213,10 +236,10 @@ protected: void pushContext(XML_Parser parser, InputSource* pInputSource); /// Pushes a new entry to the context stack. - + void popContext(); /// Pops the top-most entry from the context stack. - + void resetContext(); /// Resets and clears the context stack. @@ -240,7 +263,7 @@ protected: static void handleEndNamespaceDecl(void* userData, const XML_Char* prefix); static void handleStartDoctypeDecl(void* userData, const XML_Char* doctypeName, const XML_Char *systemId, const XML_Char* publicId, int hasInternalSubset); static void handleEndDoctypeDecl(void* userData); - static void handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength, + static void handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); static void handleExternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId); static void handleInternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* replacementText, int replacementTextLength); @@ -248,14 +271,14 @@ protected: // encoding support static int convert(void *data, const char *s); - + private: typedef std::map EncodingMap; typedef std::vector ContextStack; - + XML_Parser _parser; char* _pBuffer; - bool _encodingSpecified; + bool _encodingSpecified; XMLString _encoding; bool _expandInternalEntities; bool _externalGeneralEntities; @@ -264,14 +287,17 @@ private: NamespaceStrategy* _pNamespaceStrategy; EncodingMap _encodings; ContextStack _context; - + EntityResolver* _pEntityResolver; DTDHandler* _pDTDHandler; DeclHandler* _pDeclHandler; ContentHandler* _pContentHandler; LexicalHandler* _pLexicalHandler; ErrorHandler* _pErrorHandler; - + + float _maximumAmplificationFactor; + Poco::UInt64 _activationThresholdBytes; + static const int PARSE_BUFFER_SIZE; static const XMLString EMPTY_STRING; }; diff --git a/XML/src/ParserEngine.cpp b/XML/src/ParserEngine.cpp index 137d39db0..82d477478 100644 --- a/XML/src/ParserEngine.cpp +++ b/XML/src/ParserEngine.cpp @@ -47,16 +47,16 @@ public: _systemId(systemId) { } - + ~ContextLocator() { } - + XMLString getPublicId() const { return _publicId; } - + XMLString getSystemId() const { return _systemId; @@ -66,12 +66,12 @@ public: { return XML_GetCurrentLineNumber(_parser); } - + int getColumnNumber() const { return XML_GetCurrentColumnNumber(_parser); } - + private: XML_Parser _parser; XMLString _publicId; @@ -97,7 +97,9 @@ ParserEngine::ParserEngine(): _pDeclHandler(0), _pContentHandler(0), _pLexicalHandler(0), - _pErrorHandler(0) + _pErrorHandler(0), + _maximumAmplificationFactor(0.0), + _activationThresholdBytes(0) { } @@ -117,7 +119,9 @@ ParserEngine::ParserEngine(const XMLString& encoding): _pDeclHandler(0), _pContentHandler(0), _pLexicalHandler(0), - _pErrorHandler(0) + _pErrorHandler(0), + _maximumAmplificationFactor(0.0), + _activationThresholdBytes(0) { } @@ -145,14 +149,14 @@ void ParserEngine::addEncoding(const XMLString& name, TextEncoding* pEncoding) if (_encodings.find(name) == _encodings.end()) _encodings[name] = pEncoding; else - throw XMLException("Encoding already defined"); + throw XMLException("Encoding already defined"); } void ParserEngine::setNamespaceStrategy(NamespaceStrategy* pStrategy) { poco_check_ptr (pStrategy); - + delete _pNamespaceStrategy; _pNamespaceStrategy = pStrategy; } @@ -218,6 +222,18 @@ void ParserEngine::setEnablePartialReads(bool flag) } +void ParserEngine::setBillionLaughsAttackProtectionMaximumAmplification(float maximumAmplificationFactor) +{ + _maximumAmplificationFactor = maximumAmplificationFactor; +} + + +void ParserEngine::setBillionLaughsAttackProtectionActivationThreshold(Poco::UInt64 activationThresholdBytes) +{ + _activationThresholdBytes = activationThresholdBytes; +} + + void ParserEngine::parse(InputSource* pInputSource) { init(); @@ -267,7 +283,7 @@ void ParserEngine::parseByteInputStream(XMLByteInputStream& istr) handleError(XML_GetErrorCode(_parser)); if (istr.good()) n = readBytes(istr, _pBuffer, PARSE_BUFFER_SIZE); - else + else n = 0; } if (!XML_Parse(_parser, _pBuffer, 0, 1)) @@ -284,7 +300,7 @@ void ParserEngine::parseCharInputStream(XMLCharInputStream& istr) handleError(XML_GetErrorCode(_parser)); if (istr.good()) n = readChars(istr, reinterpret_cast(_pBuffer), PARSE_BUFFER_SIZE/sizeof(XMLChar)); - else + else n = 0; } if (!XML_Parse(_parser, _pBuffer, 0, 1)) @@ -316,7 +332,7 @@ void ParserEngine::parseExternalByteInputStream(XML_Parser extParser, XMLByteInp handleError(XML_GetErrorCode(extParser)); if (istr.good()) n = readBytes(istr, pBuffer, PARSE_BUFFER_SIZE); - else + else n = 0; } if (!XML_Parse(extParser, pBuffer, 0, 1)) @@ -343,7 +359,7 @@ void ParserEngine::parseExternalCharInputStream(XML_Parser extParser, XMLCharInp handleError(XML_GetErrorCode(extParser)); if (istr.good()) n = readChars(istr, pBuffer, static_cast(PARSE_BUFFER_SIZE/sizeof(XMLChar))); - else + else n = 0; } if (!XML_Parse(extParser, reinterpret_cast(pBuffer), 0, 1)) @@ -487,6 +503,17 @@ void ParserEngine::init() XML_SetSkippedEntityHandler(_parser, handleSkippedEntity); XML_SetParamEntityParsing(_parser, _externalParameterEntities ? XML_PARAM_ENTITY_PARSING_ALWAYS : XML_PARAM_ENTITY_PARSING_NEVER); XML_SetUnknownEncodingHandler(_parser, handleUnknownEncoding, this); + +#if XML_MAJOR_VERSION > 2 || (XML_MAJOR_VERSION == 2 && XML_MINOR_VERSION >= 4) + if (_maximumAmplificationFactor > 1.0) + { + XML_SetBillionLaughsAttackProtectionMaximumAmplification(_parser, _maximumAmplificationFactor); + } + if (_activationThresholdBytes > 0) + { + XML_SetBillionLaughsAttackProtectionActivationThreshold(_parser, _activationThresholdBytes); + } +#endif } @@ -541,7 +568,7 @@ void ParserEngine::handleError(int errorNo) case XML_ERROR_NOT_STANDALONE: throw SAXParseException("Document is not standalone", locator()); case XML_ERROR_UNEXPECTED_STATE: - throw SAXParseException("Unexpected parser state - please send a bug report", locator()); + throw SAXParseException("Unexpected parser state - please send a bug report", locator()); case XML_ERROR_ENTITY_DECLARED_IN_PE: throw SAXParseException("Entity declared in parameter entity", locator()); case XML_ERROR_FEATURE_REQUIRES_XML_DTD: @@ -570,6 +597,26 @@ void ParserEngine::handleError(int errorNo) throw SAXParseException("Parsing finished", locator()); case XML_ERROR_SUSPEND_PE: throw SAXParseException("Cannot suspend in external parameter entity", locator()); +#if XML_MAJOR_VERSION >= 2 + case XML_ERROR_RESERVED_PREFIX_XML: + throw SAXParseException("Reserved prefix 'xml' must not be undeclared or bound to another namespace name", locator()); + case XML_ERROR_RESERVED_PREFIX_XMLNS: + throw SAXParseException("Reserved prefix 'xmlns' must not be declared or undeclared", locator()); + case XML_ERROR_RESERVED_NAMESPACE_URI: + throw SAXParseException("Prefix must not be bound to one of the reserved namespace names", locator()); + #if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 1 + case XML_ERROR_INVALID_ARGUMENT: + throw SAXParseException("Invalid argument", locator()); + #endif + #if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 3 + case XML_ERROR_NO_BUFFER: + throw SAXParseException("Internal error: a successful prior call to function XML_GetBuffer is required", locator()); + #endif + #if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 4 + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: + throw SAXParseException("Limit on input amplification factor (from DTD and entities) breached", locator()); + #endif +#endif // XML_MAJOR_VERSION } throw XMLException("Unknown Expat error code"); } @@ -583,7 +630,7 @@ void ParserEngine::handleError(int errorNo) if (_pErrorHandler) _pErrorHandler->fatalError(SAXParseException("Fatal error", locator(), exc)); throw; } -} +} void ParserEngine::pushContext(XML_Parser parser, InputSource* pInputSource) @@ -614,12 +661,12 @@ void ParserEngine::resetContext() void ParserEngine::handleStartElement(void* userData, const XML_Char* name, const XML_Char** atts) { ParserEngine* pThis = reinterpret_cast(userData); - + if (pThis->_pContentHandler) { try { - pThis->_pNamespaceStrategy->startElement(name, atts, XML_GetSpecifiedAttributeCount(pThis->_parser)/2, pThis->_pContentHandler); + pThis->_pNamespaceStrategy->startElement(name, atts, XML_GetSpecifiedAttributeCount(pThis->_parser)/2, pThis->_pContentHandler); } catch (XMLException& exc) { @@ -632,12 +679,12 @@ void ParserEngine::handleStartElement(void* userData, const XML_Char* name, cons void ParserEngine::handleEndElement(void* userData, const XML_Char* name) { ParserEngine* pThis = reinterpret_cast(userData); - + if (pThis->_pContentHandler) { try { - pThis->_pNamespaceStrategy->endElement(name, pThis->_pContentHandler); + pThis->_pNamespaceStrategy->endElement(name, pThis->_pContentHandler); } catch (XMLException& exc) { @@ -650,7 +697,7 @@ void ParserEngine::handleEndElement(void* userData, const XML_Char* name) void ParserEngine::handleCharacterData(void* userData, const XML_Char* s, int len) { ParserEngine* pThis = reinterpret_cast(userData); - + if (pThis->_pContentHandler) pThis->_pContentHandler->characters(s, 0, len); } @@ -659,7 +706,7 @@ void ParserEngine::handleCharacterData(void* userData, const XML_Char* s, int le void ParserEngine::handleProcessingInstruction(void* userData, const XML_Char* target, const XML_Char* data) { ParserEngine* pThis = reinterpret_cast(userData); - + if (pThis->_pContentHandler) pThis->_pContentHandler->processingInstruction(target, data); } @@ -673,10 +720,10 @@ void ParserEngine::handleDefault(void* userData, const XML_Char* s, int len) void ParserEngine::handleUnparsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName) { ParserEngine* pThis = reinterpret_cast(userData); - + XMLString pubId; if (publicId) pubId.assign(publicId); - if (pThis->_pDTDHandler) + if (pThis->_pDTDHandler) pThis->_pDTDHandler->unparsedEntityDecl(entityName, publicId ? &pubId : 0, systemId, notationName); } @@ -684,12 +731,12 @@ void ParserEngine::handleUnparsedEntityDecl(void* userData, const XML_Char* enti void ParserEngine::handleNotationDecl(void* userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) { ParserEngine* pThis = reinterpret_cast(userData); - + XMLString pubId; if (publicId) pubId.assign(publicId); XMLString sysId; if (systemId) sysId.assign(systemId); - if (pThis->_pDTDHandler) + if (pThis->_pDTDHandler) pThis->_pDTDHandler->notationDecl(notationName, publicId ? &pubId : 0, systemId ? &sysId : 0); } @@ -708,7 +755,7 @@ int ParserEngine::handleExternalEntityRef(XML_Parser parser, const XML_Char* con XMLString sysId(systemId); XMLString pubId; if (publicId) pubId.assign(publicId); - + URI uri(fromXMLString(pThis->_context.back()->getSystemId())); uri.resolve(fromXMLString(sysId)); @@ -749,7 +796,7 @@ int ParserEngine::handleExternalEntityRef(XML_Parser parser, const XML_Char* con int ParserEngine::handleUnknownEncoding(void* encodingHandlerData, const XML_Char* name, XML_Encoding* info) { ParserEngine* pThis = reinterpret_cast(encodingHandlerData); - + XMLString encoding(name); TextEncoding* knownEncoding = 0; @@ -764,7 +811,7 @@ int ParserEngine::handleUnknownEncoding(void* encodingHandlerData, const XML_Cha const TextEncoding::CharacterMap& map = knownEncoding->characterMap(); for (int i = 0; i < 256; ++i) info->map[i] = map[i]; - + info->data = knownEncoding; info->convert = &ParserEngine::convert; info->release = 0; @@ -846,7 +893,7 @@ void ParserEngine::handleEndDoctypeDecl(void* userData) } -void ParserEngine::handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength, +void ParserEngine::handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) { if (value) @@ -880,7 +927,7 @@ void ParserEngine::handleInternalParsedEntityDecl(void* userData, const XML_Char void ParserEngine::handleSkippedEntity(void* userData, const XML_Char* entityName, int isParameterEntity) { ParserEngine* pThis = reinterpret_cast(userData); - + if (pThis->_pContentHandler) pThis->_pContentHandler->skippedEntity(entityName); } diff --git a/XML/src/SAXParser.cpp b/XML/src/SAXParser.cpp index dd9c40d28..54238b51b 100644 --- a/XML/src/SAXParser.cpp +++ b/XML/src/SAXParser.cpp @@ -17,6 +17,7 @@ #include "Poco/SAX/EntityResolverImpl.h" #include "Poco/SAX/InputSource.h" #include "Poco/XML/NamespaceStrategy.h" +#include "Poco/NumberParser.h" #include @@ -25,6 +26,8 @@ namespace XML { const XMLString SAXParser::FEATURE_PARTIAL_READS = toXMLString("http://www.appinf.com/features/enable-partial-reads"); +const XMLString SAXParser::PROPERTY_BLA_MAXIMUM_AMPLIFICATION = toXMLString("http://www.appinf.com/properties/bla-maximum-amplification"); +const XMLString SAXParser::PROPERTY_BLA_ACTIVATION_THRESHOLD = toXMLString("http://www.appinf.com/properties/bla-activation-threshold"); SAXParser::SAXParser(): @@ -52,7 +55,7 @@ void SAXParser::setEncoding(const XMLString& encoding) _engine.setEncoding(encoding); } - + const XMLString& SAXParser::getEncoding() const { return _engine.getEncoding(); @@ -153,6 +156,10 @@ void SAXParser::setProperty(const XMLString& propertyId, const XMLString& value) { if (propertyId == XMLReader::PROPERTY_DECLARATION_HANDLER || propertyId == XMLReader::PROPERTY_LEXICAL_HANDLER) throw SAXNotSupportedException(std::string("property does not take a string value: ") + fromXMLString(propertyId)); + else if (propertyId == PROPERTY_BLA_MAXIMUM_AMPLIFICATION) + _engine.setBillionLaughsAttackProtectionMaximumAmplification(static_cast(Poco::NumberParser::parseFloat(value))); + else if (propertyId == PROPERTY_BLA_ACTIVATION_THRESHOLD) + _engine.setBillionLaughsAttackProtectionActivationThreshold(Poco::NumberParser::parseUnsigned64(value)); else throw SAXNotRecognizedException(fromXMLString(propertyId)); }