expose million laughs attack protection implemented by Expat 2.4

This commit is contained in:
Günter Obiltschnig 2021-05-23 21:23:28 +02:00
parent d95d9bd4a4
commit cd6422fde3
4 changed files with 157 additions and 67 deletions

View File

@ -28,7 +28,7 @@ namespace XML {
class XML_API SAXParser: public XMLReader
/// This class provides a SAX2 (Simple API for XML) interface to expat,
/// This class provides a SAX2 (Simple API for XML) interface to expat,
/// the XML parser toolkit.
/// The following SAX2 features and properties are supported:
/// * http://xml.org/sax/features/external-general-entities
@ -41,6 +41,14 @@ class XML_API SAXParser: public XMLReader
/// The following proprietary extensions are supported:
/// * http://www.appinf.com/features/enable-partial-reads --
/// see ParserEngine::setEnablePartialReads()
/// * http://www.appinf.com/properties/bla-maximum-amplification
/// see ParserEngine::setBillionLaughsAttackProtectionMaximumAmplification();
/// argument must be a float >= 1.0 formatted as string;
/// property is set-only.
/// * http://www.appinf.com/properties/bla-activation-threshold
/// see ParserEngine::setBillionLaughsAttackProtectionActivationThreshold();
/// argument must be a 64-bit unsigned integer formatted as string;
/// property is set-only.
{
public:
SAXParser();
@ -48,14 +56,14 @@ public:
SAXParser(const XMLString& encoding);
/// Creates an SAXParser with the given encoding.
~SAXParser();
/// Destroys the SAXParser.
void setEncoding(const XMLString& encoding);
/// Sets the encoding used by the parser if no
/// encoding is specified in the XML document.
const XMLString& getEncoding() const;
/// Returns the name of the encoding used by
/// the parser if no encoding is specified in
@ -81,11 +89,13 @@ public:
void parse(InputSource* pSource);
void parse(const XMLString& systemId);
void parseMemoryNP(const char* xml, std::size_t size);
/// Extensions
void parseString(const std::string& xml);
static const XMLString FEATURE_PARTIAL_READS;
static const XMLString PROPERTY_BLA_MAXIMUM_AMPLIFICATION;
static const XMLString PROPERTY_BLA_ACTIVATION_THRESHOLD;
protected:
void setupParse();

View File

@ -47,28 +47,28 @@ class ContextLocator;
class XML_API ParserEngine: public Locator
/// This class provides an object-oriented, stream-based,
/// This class provides an object-oriented, stream-based,
/// low-level interface to the XML Parser Toolkit (expat).
/// It is strongly recommended, that you use the
/// SAX parser classes (which are based on this
/// class) instead of this class, since they provide
/// class) instead of this class, since they provide
/// a standardized, higher-level interface to the parser.
{
public:
ParserEngine();
/// Creates the parser engine.
ParserEngine(const XMLString& encoding);
/// Creates the parser engine and passes the encoding
/// to the underlying parser.
~ParserEngine();
/// Destroys the parser.
void setEncoding(const XMLString& encoding);
/// Sets the encoding used by expat. The encoding must be
/// set before parsing begins, otherwise it will be ignored.
const XMLString& getEncoding() const;
/// Returns the encoding used by expat.
@ -80,33 +80,33 @@ public:
/// The parser takes ownership of the strategy object
/// and deletes it when it's no longer needed.
/// The default is NoNamespacesStrategy.
NamespaceStrategy* getNamespaceStrategy() const;
/// Returns the NamespaceStrategy currently in use.
void setExpandInternalEntities(bool flag = true);
/// Enables/disables expansion of internal entities (enabled by
/// default). If entity expansion is disabled, internal entities
/// default). If entity expansion is disabled, internal entities
/// are reported via the default handler.
/// Must be set before parsing begins, otherwise it will be
/// ignored.
bool getExpandInternalEntities() const;
/// Returns true if internal entities will be expanded automatically,
/// which is the default.
void setExternalGeneralEntities(bool flag = true);
/// Enable or disable processing of external general entities.
bool getExternalGeneralEntities() const;
/// Returns true if external general entities will be processed; false otherwise.
void setExternalParameterEntities(bool flag = true);
/// Enable or disable processing of external parameter entities.
bool getExternalParameterEntities() const;
/// Returns true if external parameter entities will be processed; false otherwise.
void setEntityResolver(EntityResolver* pResolver);
/// Allow an application to register an entity resolver.
@ -121,7 +121,7 @@ public:
void setDeclHandler(DeclHandler* pDeclHandler);
/// Allow an application to register a DTD declarations event handler.
DeclHandler* getDeclHandler() const;
/// Return the current DTD declarations handler.
@ -133,7 +133,7 @@ public:
void setLexicalHandler(LexicalHandler* pLexicalHandler);
/// Allow an application to register a lexical event handler.
LexicalHandler* getLexicalHandler() const;
/// Return the current lexical handler.
@ -142,12 +142,12 @@ public:
ErrorHandler* getErrorHandler() const;
/// Return the current error handler.
void setEnablePartialReads(bool flag = true);
/// Enable or disable partial reads from the input source.
///
/// This is useful for parsing XML from a socket stream for
/// a protocol like XMPP, where basically single elements
/// a protocol like XMPP, where basically single elements
/// are read one at a time from the input source's stream, and
/// following elements depend upon responses sent back to
/// the peer.
@ -158,21 +158,44 @@ public:
/// This allows for efficient parsing of "complete" XML documents,
/// but fails in a case such as XMPP, where only XML fragments
/// are sent at a time.
bool getEnablePartialReads() const;
/// Returns true if partial reads are enabled (see
/// setEnablePartialReads()), false otherwise.
void setBillionLaughsAttackProtectionMaximumAmplification(float maximumAmplificationFactor);
/// Sets the maximum tolerated amplification factor
/// for protection against Billion Laughs Attacks.
///
/// The amplification factor is calculated as:
/// amplification := (direct + indirect) / direct
/// while parsing, whereas:
/// - direct is the number of bytes read from the primary document in parsing and
/// - indirect is the number of bytes added by expanding entities and reading of
/// external DTD files, combined.
///
/// maximumAmplificationFactor must be non-NaN and greater than or equal to 1.0.
///
/// Requires an underlying Expat version >= 2.4.0.
void setBillionLaughsAttackProtectionActivationThreshold(Poco::UInt64 activationThresholdBytes);
/// Sets number of output bytes (including amplification from entity expansion and reading DTD files)
/// needed to activate protection against Billion Laughs Attacks.
///
/// Defaults to 8 MiB.
///
/// Requires an underlying Expat version >= 2.4.0.
void parse(InputSource* pInputSource);
/// Parse an XML document from the given InputSource.
void parse(const char* pBuffer, std::size_t size);
/// Parses an XML document from the given buffer.
// Locator
XMLString getPublicId() const;
/// Return the public identifier for the current document event.
XMLString getSystemId() const;
/// Return the system identifier for the current document event.
@ -180,7 +203,7 @@ public:
/// Return the line number where the current document event ends.
int getColumnNumber() const;
/// Return the column number where the current document event ends.
/// Return the column number where the current document event ends.
protected:
void init();
@ -191,7 +214,7 @@ protected:
void parseCharInputStream(XMLCharInputStream& istr);
/// Parses an entity from the given stream.
std::streamsize readBytes(XMLByteInputStream& istr, char* pBuffer, std::streamsize bufferSize);
/// Reads at most bufferSize bytes from the given stream into the given buffer.
@ -213,10 +236,10 @@ protected:
void pushContext(XML_Parser parser, InputSource* pInputSource);
/// Pushes a new entry to the context stack.
void popContext();
/// Pops the top-most entry from the context stack.
void resetContext();
/// Resets and clears the context stack.
@ -240,7 +263,7 @@ protected:
static void handleEndNamespaceDecl(void* userData, const XML_Char* prefix);
static void handleStartDoctypeDecl(void* userData, const XML_Char* doctypeName, const XML_Char *systemId, const XML_Char* publicId, int hasInternalSubset);
static void handleEndDoctypeDecl(void* userData);
static void handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength,
static void handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength,
const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName);
static void handleExternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId);
static void handleInternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* replacementText, int replacementTextLength);
@ -248,14 +271,14 @@ protected:
// encoding support
static int convert(void *data, const char *s);
private:
typedef std::map<XMLString, Poco::TextEncoding*> EncodingMap;
typedef std::vector<ContextLocator*> ContextStack;
XML_Parser _parser;
char* _pBuffer;
bool _encodingSpecified;
bool _encodingSpecified;
XMLString _encoding;
bool _expandInternalEntities;
bool _externalGeneralEntities;
@ -264,14 +287,17 @@ private:
NamespaceStrategy* _pNamespaceStrategy;
EncodingMap _encodings;
ContextStack _context;
EntityResolver* _pEntityResolver;
DTDHandler* _pDTDHandler;
DeclHandler* _pDeclHandler;
ContentHandler* _pContentHandler;
LexicalHandler* _pLexicalHandler;
ErrorHandler* _pErrorHandler;
float _maximumAmplificationFactor;
Poco::UInt64 _activationThresholdBytes;
static const int PARSE_BUFFER_SIZE;
static const XMLString EMPTY_STRING;
};

View File

@ -47,16 +47,16 @@ public:
_systemId(systemId)
{
}
~ContextLocator()
{
}
XMLString getPublicId() const
{
return _publicId;
}
XMLString getSystemId() const
{
return _systemId;
@ -66,12 +66,12 @@ public:
{
return XML_GetCurrentLineNumber(_parser);
}
int getColumnNumber() const
{
return XML_GetCurrentColumnNumber(_parser);
}
private:
XML_Parser _parser;
XMLString _publicId;
@ -97,7 +97,9 @@ ParserEngine::ParserEngine():
_pDeclHandler(0),
_pContentHandler(0),
_pLexicalHandler(0),
_pErrorHandler(0)
_pErrorHandler(0),
_maximumAmplificationFactor(0.0),
_activationThresholdBytes(0)
{
}
@ -117,7 +119,9 @@ ParserEngine::ParserEngine(const XMLString& encoding):
_pDeclHandler(0),
_pContentHandler(0),
_pLexicalHandler(0),
_pErrorHandler(0)
_pErrorHandler(0),
_maximumAmplificationFactor(0.0),
_activationThresholdBytes(0)
{
}
@ -145,14 +149,14 @@ void ParserEngine::addEncoding(const XMLString& name, TextEncoding* pEncoding)
if (_encodings.find(name) == _encodings.end())
_encodings[name] = pEncoding;
else
throw XMLException("Encoding already defined");
throw XMLException("Encoding already defined");
}
void ParserEngine::setNamespaceStrategy(NamespaceStrategy* pStrategy)
{
poco_check_ptr (pStrategy);
delete _pNamespaceStrategy;
_pNamespaceStrategy = pStrategy;
}
@ -218,6 +222,18 @@ void ParserEngine::setEnablePartialReads(bool flag)
}
void ParserEngine::setBillionLaughsAttackProtectionMaximumAmplification(float maximumAmplificationFactor)
{
_maximumAmplificationFactor = maximumAmplificationFactor;
}
void ParserEngine::setBillionLaughsAttackProtectionActivationThreshold(Poco::UInt64 activationThresholdBytes)
{
_activationThresholdBytes = activationThresholdBytes;
}
void ParserEngine::parse(InputSource* pInputSource)
{
init();
@ -267,7 +283,7 @@ void ParserEngine::parseByteInputStream(XMLByteInputStream& istr)
handleError(XML_GetErrorCode(_parser));
if (istr.good())
n = readBytes(istr, _pBuffer, PARSE_BUFFER_SIZE);
else
else
n = 0;
}
if (!XML_Parse(_parser, _pBuffer, 0, 1))
@ -284,7 +300,7 @@ void ParserEngine::parseCharInputStream(XMLCharInputStream& istr)
handleError(XML_GetErrorCode(_parser));
if (istr.good())
n = readChars(istr, reinterpret_cast<XMLChar*>(_pBuffer), PARSE_BUFFER_SIZE/sizeof(XMLChar));
else
else
n = 0;
}
if (!XML_Parse(_parser, _pBuffer, 0, 1))
@ -316,7 +332,7 @@ void ParserEngine::parseExternalByteInputStream(XML_Parser extParser, XMLByteInp
handleError(XML_GetErrorCode(extParser));
if (istr.good())
n = readBytes(istr, pBuffer, PARSE_BUFFER_SIZE);
else
else
n = 0;
}
if (!XML_Parse(extParser, pBuffer, 0, 1))
@ -343,7 +359,7 @@ void ParserEngine::parseExternalCharInputStream(XML_Parser extParser, XMLCharInp
handleError(XML_GetErrorCode(extParser));
if (istr.good())
n = readChars(istr, pBuffer, static_cast<int>(PARSE_BUFFER_SIZE/sizeof(XMLChar)));
else
else
n = 0;
}
if (!XML_Parse(extParser, reinterpret_cast<char*>(pBuffer), 0, 1))
@ -487,6 +503,17 @@ void ParserEngine::init()
XML_SetSkippedEntityHandler(_parser, handleSkippedEntity);
XML_SetParamEntityParsing(_parser, _externalParameterEntities ? XML_PARAM_ENTITY_PARSING_ALWAYS : XML_PARAM_ENTITY_PARSING_NEVER);
XML_SetUnknownEncodingHandler(_parser, handleUnknownEncoding, this);
#if XML_MAJOR_VERSION > 2 || (XML_MAJOR_VERSION == 2 && XML_MINOR_VERSION >= 4)
if (_maximumAmplificationFactor > 1.0)
{
XML_SetBillionLaughsAttackProtectionMaximumAmplification(_parser, _maximumAmplificationFactor);
}
if (_activationThresholdBytes > 0)
{
XML_SetBillionLaughsAttackProtectionActivationThreshold(_parser, _activationThresholdBytes);
}
#endif
}
@ -541,7 +568,7 @@ void ParserEngine::handleError(int errorNo)
case XML_ERROR_NOT_STANDALONE:
throw SAXParseException("Document is not standalone", locator());
case XML_ERROR_UNEXPECTED_STATE:
throw SAXParseException("Unexpected parser state - please send a bug report", locator());
throw SAXParseException("Unexpected parser state - please send a bug report", locator());
case XML_ERROR_ENTITY_DECLARED_IN_PE:
throw SAXParseException("Entity declared in parameter entity", locator());
case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
@ -570,6 +597,26 @@ void ParserEngine::handleError(int errorNo)
throw SAXParseException("Parsing finished", locator());
case XML_ERROR_SUSPEND_PE:
throw SAXParseException("Cannot suspend in external parameter entity", locator());
#if XML_MAJOR_VERSION >= 2
case XML_ERROR_RESERVED_PREFIX_XML:
throw SAXParseException("Reserved prefix 'xml' must not be undeclared or bound to another namespace name", locator());
case XML_ERROR_RESERVED_PREFIX_XMLNS:
throw SAXParseException("Reserved prefix 'xmlns' must not be declared or undeclared", locator());
case XML_ERROR_RESERVED_NAMESPACE_URI:
throw SAXParseException("Prefix must not be bound to one of the reserved namespace names", locator());
#if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 1
case XML_ERROR_INVALID_ARGUMENT:
throw SAXParseException("Invalid argument", locator());
#endif
#if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 3
case XML_ERROR_NO_BUFFER:
throw SAXParseException("Internal error: a successful prior call to function XML_GetBuffer is required", locator());
#endif
#if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 4
case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
throw SAXParseException("Limit on input amplification factor (from DTD and entities) breached", locator());
#endif
#endif // XML_MAJOR_VERSION
}
throw XMLException("Unknown Expat error code");
}
@ -583,7 +630,7 @@ void ParserEngine::handleError(int errorNo)
if (_pErrorHandler) _pErrorHandler->fatalError(SAXParseException("Fatal error", locator(), exc));
throw;
}
}
}
void ParserEngine::pushContext(XML_Parser parser, InputSource* pInputSource)
@ -614,12 +661,12 @@ void ParserEngine::resetContext()
void ParserEngine::handleStartElement(void* userData, const XML_Char* name, const XML_Char** atts)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData);
if (pThis->_pContentHandler)
{
try
{
pThis->_pNamespaceStrategy->startElement(name, atts, XML_GetSpecifiedAttributeCount(pThis->_parser)/2, pThis->_pContentHandler);
pThis->_pNamespaceStrategy->startElement(name, atts, XML_GetSpecifiedAttributeCount(pThis->_parser)/2, pThis->_pContentHandler);
}
catch (XMLException& exc)
{
@ -632,12 +679,12 @@ void ParserEngine::handleStartElement(void* userData, const XML_Char* name, cons
void ParserEngine::handleEndElement(void* userData, const XML_Char* name)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData);
if (pThis->_pContentHandler)
{
try
{
pThis->_pNamespaceStrategy->endElement(name, pThis->_pContentHandler);
pThis->_pNamespaceStrategy->endElement(name, pThis->_pContentHandler);
}
catch (XMLException& exc)
{
@ -650,7 +697,7 @@ void ParserEngine::handleEndElement(void* userData, const XML_Char* name)
void ParserEngine::handleCharacterData(void* userData, const XML_Char* s, int len)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData);
if (pThis->_pContentHandler)
pThis->_pContentHandler->characters(s, 0, len);
}
@ -659,7 +706,7 @@ void ParserEngine::handleCharacterData(void* userData, const XML_Char* s, int le
void ParserEngine::handleProcessingInstruction(void* userData, const XML_Char* target, const XML_Char* data)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData);
if (pThis->_pContentHandler)
pThis->_pContentHandler->processingInstruction(target, data);
}
@ -673,10 +720,10 @@ void ParserEngine::handleDefault(void* userData, const XML_Char* s, int len)
void ParserEngine::handleUnparsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData);
XMLString pubId;
if (publicId) pubId.assign(publicId);
if (pThis->_pDTDHandler)
if (pThis->_pDTDHandler)
pThis->_pDTDHandler->unparsedEntityDecl(entityName, publicId ? &pubId : 0, systemId, notationName);
}
@ -684,12 +731,12 @@ void ParserEngine::handleUnparsedEntityDecl(void* userData, const XML_Char* enti
void ParserEngine::handleNotationDecl(void* userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData);
XMLString pubId;
if (publicId) pubId.assign(publicId);
XMLString sysId;
if (systemId) sysId.assign(systemId);
if (pThis->_pDTDHandler)
if (pThis->_pDTDHandler)
pThis->_pDTDHandler->notationDecl(notationName, publicId ? &pubId : 0, systemId ? &sysId : 0);
}
@ -708,7 +755,7 @@ int ParserEngine::handleExternalEntityRef(XML_Parser parser, const XML_Char* con
XMLString sysId(systemId);
XMLString pubId;
if (publicId) pubId.assign(publicId);
URI uri(fromXMLString(pThis->_context.back()->getSystemId()));
uri.resolve(fromXMLString(sysId));
@ -749,7 +796,7 @@ int ParserEngine::handleExternalEntityRef(XML_Parser parser, const XML_Char* con
int ParserEngine::handleUnknownEncoding(void* encodingHandlerData, const XML_Char* name, XML_Encoding* info)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(encodingHandlerData);
XMLString encoding(name);
TextEncoding* knownEncoding = 0;
@ -764,7 +811,7 @@ int ParserEngine::handleUnknownEncoding(void* encodingHandlerData, const XML_Cha
const TextEncoding::CharacterMap& map = knownEncoding->characterMap();
for (int i = 0; i < 256; ++i)
info->map[i] = map[i];
info->data = knownEncoding;
info->convert = &ParserEngine::convert;
info->release = 0;
@ -846,7 +893,7 @@ void ParserEngine::handleEndDoctypeDecl(void* userData)
}
void ParserEngine::handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength,
void ParserEngine::handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength,
const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName)
{
if (value)
@ -880,7 +927,7 @@ void ParserEngine::handleInternalParsedEntityDecl(void* userData, const XML_Char
void ParserEngine::handleSkippedEntity(void* userData, const XML_Char* entityName, int isParameterEntity)
{
ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData);
if (pThis->_pContentHandler)
pThis->_pContentHandler->skippedEntity(entityName);
}

View File

@ -17,6 +17,7 @@
#include "Poco/SAX/EntityResolverImpl.h"
#include "Poco/SAX/InputSource.h"
#include "Poco/XML/NamespaceStrategy.h"
#include "Poco/NumberParser.h"
#include <sstream>
@ -25,6 +26,8 @@ namespace XML {
const XMLString SAXParser::FEATURE_PARTIAL_READS = toXMLString("http://www.appinf.com/features/enable-partial-reads");
const XMLString SAXParser::PROPERTY_BLA_MAXIMUM_AMPLIFICATION = toXMLString("http://www.appinf.com/properties/bla-maximum-amplification");
const XMLString SAXParser::PROPERTY_BLA_ACTIVATION_THRESHOLD = toXMLString("http://www.appinf.com/properties/bla-activation-threshold");
SAXParser::SAXParser():
@ -52,7 +55,7 @@ void SAXParser::setEncoding(const XMLString& encoding)
_engine.setEncoding(encoding);
}
const XMLString& SAXParser::getEncoding() const
{
return _engine.getEncoding();
@ -153,6 +156,10 @@ void SAXParser::setProperty(const XMLString& propertyId, const XMLString& value)
{
if (propertyId == XMLReader::PROPERTY_DECLARATION_HANDLER || propertyId == XMLReader::PROPERTY_LEXICAL_HANDLER)
throw SAXNotSupportedException(std::string("property does not take a string value: ") + fromXMLString(propertyId));
else if (propertyId == PROPERTY_BLA_MAXIMUM_AMPLIFICATION)
_engine.setBillionLaughsAttackProtectionMaximumAmplification(static_cast<float>(Poco::NumberParser::parseFloat(value)));
else if (propertyId == PROPERTY_BLA_ACTIVATION_THRESHOLD)
_engine.setBillionLaughsAttackProtectionActivationThreshold(Poco::NumberParser::parseUnsigned64(value));
else
throw SAXNotRecognizedException(fromXMLString(propertyId));
}