Update SchemaParser to fully support schema caching

This commit is contained in:
Tristan Penman 2016-02-14 11:01:37 -08:00
parent bc14ef8064
commit bee57e1f90
2 changed files with 288 additions and 76 deletions

View File

@ -14,6 +14,7 @@
#include <valijson/constraints/concrete_constraints.hpp> #include <valijson/constraints/concrete_constraints.hpp>
#include <valijson/internal/json_pointer.hpp> #include <valijson/internal/json_pointer.hpp>
#include <valijson/internal/json_reference.hpp> #include <valijson/internal/json_reference.hpp>
#include <valijson/internal/uri.hpp>
#include <valijson/schema.hpp> #include <valijson/schema.hpp>
#ifdef __clang__ #ifdef __clang__
@ -97,7 +98,7 @@ public:
typename DocumentCache<AdapterType>::Type docCache; typename DocumentCache<AdapterType>::Type docCache;
SchemaCache schemaCache; SchemaCache schemaCache;
try { try {
populateSchema(schema, node, node, schema, boost::none, "", resolveThenPopulateSchema(schema, node, node, schema, boost::none, "",
fetchDoc, NULL, NULL, docCache, schemaCache); fetchDoc, NULL, NULL, docCache, schemaCache);
} catch (...) { } catch (...) {
freeDocumentCache<AdapterType>(docCache, freeDoc); freeDocumentCache<AdapterType>(docCache, freeDoc);
@ -118,7 +119,7 @@ private:
typedef std::map<std::string, const AdapterType*> Type; typedef std::map<std::string, const AdapterType*> Type;
}; };
typedef std::map<std::string, boost::shared_ptr<Schema> > SchemaCache; typedef std::map<std::string, const Subschema *> SchemaCache;
/** /**
* @brief Free memory used by fetched documents * @brief Free memory used by fetched documents
@ -140,6 +141,52 @@ private:
} }
} }
/**
* @brief Find the absolute URI for a document, within a resolution scope
*
* This function captures five different cases that can occur when
* attempting to resolve a document URI within a particular resolution
* scope:
*
* - resolution scope not present, but absolute document URI is
* => document URI as-is
* - resolution scope not present, and document URI is relative or absent
* => no result
* - resolution scope is present, and document URI is a relative path
* => resolve document URI relative to resolution scope
* - resolution scope is present, and document URI is absolute
* => document URI as-is
* - resolution scope is present, but document URI is not
* => resolution scope as-is
*
* This function assumes that the resolution scope is absolute.
*
* When resolving a document URI relative to the resolution scope, the
* document URI should be used to replace the path, query and fragment
* portions of URI provided by the resolution scope.
*/
static boost::optional<std::string> findAbsoluteDocumentUri(
const boost::optional<std::string> resolutionScope,
const boost::optional<std::string> documentUri)
{
if (resolutionScope) {
if (documentUri) {
if (internal::uri::isUriAbsolute(*documentUri)) {
return *documentUri;
} else {
return internal::uri::resolveRelativeUri(
*resolutionScope, *documentUri);
}
} else {
return *resolutionScope;
}
} else if (documentUri && internal::uri::isUriAbsolute(*documentUri)) {
return *documentUri;
} else {
return boost::none;
}
}
/** /**
* @brief Extract a JSON Reference string from a node * @brief Extract a JSON Reference string from a node
* *
@ -189,6 +236,55 @@ private:
return ""; return "";
} }
/**
* @brief Search the schema cache for a schema matching a given key
*
* If the key is not present in the query cache, a NULL pointer will be
* returned, and the contents of the cache will remain unchanged. This is
* in contrast to the behaviour of the std::map [] operator, which would
* add the NULL pointer to the cache.
*
* @param schemaCache schema cache to query
* @param queryKey key to search for
*
* @return shared pointer to Schema if found, NULL pointer otherwise
*/
static const Subschema * querySchemaCache(SchemaCache &schemaCache,
const std::string &queryKey)
{
const SchemaCache::iterator itr = schemaCache.find(queryKey);
if (itr == schemaCache.end()) {
return NULL;
}
return itr->second;
}
/**
* @brief Add entries to the schema cache for a given list of keys
*
* @param schemaCache schema cache to update
* @param keysToCreate list of keys to create entries for
* @param schema shared pointer to schema that keys will map to
*
* @throws std::logic_error if any of the keys are already present in the
* schema cache. This behaviour is intended to help detect incorrect
* usage of the schema cache during development, and is not expected
* to occur otherwise, even for malformed schemas.
*/
void updateSchemaCache(SchemaCache &schemaCache,
const std::vector<std::string> &keysToCreate,
const Subschema *schema)
{
BOOST_FOREACH( const std::string &keyToCreate, keysToCreate ) {
const SchemaCache::value_type value(keyToCreate, schema);
if (!schemaCache.insert(value).second) {
throw std::logic_error(
"Key '" + keyToCreate + "' already in schema cache.");
}
}
}
/** /**
* @brief Recursive helper function for retrieving or creating schemas * @brief Recursive helper function for retrieving or creating schemas
* *
@ -234,12 +330,133 @@ private:
SchemaCache &schemaCache, SchemaCache &schemaCache,
std::vector<std::string> &newCacheKeys) std::vector<std::string> &newCacheKeys)
{ {
const Subschema *subschema = rootSchema.createSubschema(); std::string jsonRef;
populateSchema<AdapterType>(rootSchema, rootNode, node, *subschema,
currentScope, nodePath, fetchDoc, parentSubschema, ownName,
docCache, schemaCache);
return subschema; // Check for the first termination condition (found a non-$ref node)
if (!extractJsonReference(node, jsonRef)) {
// Construct a key that we can use to search the schema cache for
// a schema corresponding to the current node
const std::string schemaCacheKey =
currentScope ? (*currentScope + nodePath) : nodePath;
// Retrieve an existing schema from the cache if possible
const Subschema *cachedPtr =
querySchemaCache(schemaCache, schemaCacheKey);
// Create a new schema otherwise
const Subschema *subschema = cachedPtr ? cachedPtr :
rootSchema.createSubschema();
// Add cache entries for keys belonging to any $ref nodes that were
// visited before arriving at the current node
updateSchemaCache(schemaCache, newCacheKeys, subschema);
// Schema cache did not contain a pre-existing schema corresponding
// to the current node, so the schema that was returned will need
// to be populated
if (!cachedPtr) {
populateSchema(rootSchema, rootNode, node, *subschema,
currentScope, nodePath, fetchDoc, parentSubschema,
ownName, docCache, schemaCache);
}
return subschema;
}
// Returns a document URI if the reference points somewhere
// other than the current document
const boost::optional<std::string> documentUri =
internal::json_reference::getJsonReferenceUri(jsonRef);
// Extract JSON Pointer from JSON Reference, with any trailing
// slashes removed so that keys in the schema cache end
// consistently
const std::string actualJsonPointer = sanitiseJsonPointer(
internal::json_reference::getJsonReferencePointer(jsonRef));
// Determine the actual document URI based on the resolution
// scope. An absolute document URI will take precedence when
// present, otherwise we need to resolve the URI relative to
// the current resolution scope
const boost::optional<std::string> actualDocumentUri =
findAbsoluteDocumentUri(currentScope, documentUri);
// Construct a key to search the schema cache for an existing schema
const std::string queryKey = actualDocumentUri ?
(*actualDocumentUri + actualJsonPointer) : actualJsonPointer;
// Check for the second termination condition (found a $ref node that
// already has an entry in the schema cache)
const Subschema *cachedPtr = querySchemaCache(schemaCache, queryKey);
if (cachedPtr) {
updateSchemaCache(schemaCache, newCacheKeys, cachedPtr);
return cachedPtr;
}
if (actualDocumentUri) {
const AdapterType *newDoc = NULL;
// Have we seen this document before?
typename DocumentCache<AdapterType>::Type::iterator docCacheItr =
docCache.find(*actualDocumentUri);
if (docCacheItr == docCache.end()) {
// Resolve reference against remote document
if (!fetchDoc) {
throw std::runtime_error(
"Fetching of remote JSON References not enabled.");
}
// Returns a pointer to the remote document that was
// retrieved, or null if retrieval failed. This class
// will take ownership of the pointer, and call freeDoc
// when it is no longer needed.
newDoc = fetchDoc(*actualDocumentUri);
// Can't proceed without the remote document
if (!newDoc) {
throw std::runtime_error(
"Failed to fetch referenced schema document: " +
*actualDocumentUri);
}
typedef typename DocumentCache<AdapterType>::Type::value_type
DocCacheValueType;
docCache.insert(DocCacheValueType(*actualDocumentUri, newDoc));
} else {
newDoc = docCacheItr->second;
}
// Find where we need to be in the document
const AdapterType &referencedAdapter =
internal::json_pointer::resolveJsonPointer(*newDoc,
actualJsonPointer);
newCacheKeys.push_back(queryKey);
// Populate the schema, starting from the referenced node, with
// nested JSON References resolved relative to the new root node
return makeOrReuseSchema(rootSchema, *newDoc, referencedAdapter,
currentScope, actualJsonPointer, fetchDoc, parentSubschema,
ownName, docCache, schemaCache, newCacheKeys);
}
// JSON References in nested schema will be resolved relative to the
// current document
const AdapterType &referencedAdapter =
internal::json_pointer::resolveJsonPointer(
rootNode, actualJsonPointer);
newCacheKeys.push_back(queryKey);
// Populate the schema, starting from the referenced node, with
// nested JSON References resolved relative to the new root node
return makeOrReuseSchema(rootSchema, rootNode, referencedAdapter,
currentScope, actualJsonPointer, fetchDoc, parentSubschema,
ownName, docCache, schemaCache, newCacheKeys);
} }
/** /**
@ -329,14 +546,6 @@ private:
"SchemaParser::populateSchema must be invoked with an " "SchemaParser::populateSchema must be invoked with an "
"appropriate Adapter implementation"); "appropriate Adapter implementation");
std::string jsonRef;
if (extractJsonReference(node, jsonRef)) {
populateSchemaUsingJsonReference(rootSchema, jsonRef, rootNode,
node, subschema, currentScope, nodePath, fetchDoc,
parentSubschema, ownName, docCache, schemaCache);
return;
}
const typename AdapterType::Object object = node.asObject(); const typename AdapterType::Object object = node.asObject();
typename AdapterType::Object::const_iterator itr(object.end()); typename AdapterType::Object::const_iterator itr(object.end());
@ -614,98 +823,101 @@ private:
} }
/** /**
* @brief Populate a schema using a JSON Reference * @brief Resolves a chain of JSON References before populating a schema
* *
* Allows JSON references to be used with minimal changes to the parser * This helper function is used directly by the publicly visible
* helper functions. * populateSchema function. It ensures that the node being parsed is a
* concrete node, and not a JSON Reference. This function will call itself
* recursively to resolve references until a concrete node is found.
* *
* @param rootSchema The Schema instance, and root subschema, through * @param rootSchema The Schema instance, and root subschema, through
* which other subschemas can be created and * which other subschemas can be created and modified
* modified * @param rootNode Reference to the node from which JSON References
* @param jsonRef String containing JSON Reference value * will be resolved when they refer to the current
* @param rootNode Reference to the node from which JSON References * document
* will be resolved when they refer to the current * @param node Reference to node to parse
* document; used for recursive parsing of schemas * @param subschema Reference to Schema to populate
* @param node Reference to node to parse * @param currentScope URI for current resolution scope
* @param schema Reference to Schema to populate * @param nodePath JSON Pointer representing path to current node
* @param currentScope URI for current resolution scope * @param fetchDoc Function to fetch remote JSON documents (optional)
* @param nodePath JSON Pointer representing path to current node * @param parentSchema Optional pointer to the parent schema, used to
* @param fetchDoc Optional function to fetch remote JSON documents * support required keyword in Draft 3
* @param parentSubschema Optional pointer to the parent schema, used to * @param ownName Optional pointer to a node name, used to support
* support required keyword in Draft 3 * the 'required' keyword in Draft 3
* @param ownName Optional pointer to a node name, used to support * @param docCache Cache of resolved and fetched remote documents
* the 'required' keyword in Draft 3 * @param schemaCache Cache of populated schemas
*/ */
template<typename AdapterType> template<typename AdapterType>
void populateSchemaUsingJsonReference( void resolveThenPopulateSchema(
Schema &rootSchema, Schema &rootSchema,
const std::string &jsonRef,
const AdapterType &rootNode, const AdapterType &rootNode,
const AdapterType &, const AdapterType &node,
const Subschema &subschema, const Subschema &subschema,
const boost::optional<std::string> currentScope, const boost::optional<std::string> currentScope,
const std::string &nodePath, const std::string &nodePath,
const typename FunctionPtrs<AdapterType>::FetchDoc fetchDoc, const typename FunctionPtrs<AdapterType>::FetchDoc fetchDoc,
const Subschema *parentSubschema, const Subschema *parentSchema,
const std::string *ownName, const std::string *ownName,
typename DocumentCache<AdapterType>::Type &docCache, typename DocumentCache<AdapterType>::Type &docCache,
SchemaCache &schemaCache) SchemaCache &schemaCache)
{ {
std::string jsonRef;
if (!extractJsonReference(node, jsonRef)) {
populateSchema(rootSchema, rootNode, node, subschema, currentScope,
nodePath, fetchDoc, parentSchema, ownName, docCache,
schemaCache);
return;
}
// Returns a document URI if the reference points somewhere // Returns a document URI if the reference points somewhere
// other than the current document // other than the current document
const boost::optional<std::string> documentUri = const boost::optional<std::string> documentUri =
internal::json_reference::getJsonReferenceUri(jsonRef); internal::json_reference::getJsonReferenceUri(jsonRef);
// Extract JSON Pointer from JSON Reference // Extract JSON Pointer from JSON Reference
const std::string jsonPointer = sanitiseJsonPointer( const std::string actualJsonPointer = sanitiseJsonPointer(
internal::json_reference::getJsonReferencePointer(jsonRef)); internal::json_reference::getJsonReferencePointer(jsonRef));
if (documentUri) { if (documentUri && internal::uri::isUriAbsolute(*documentUri)) {
// Resolve reference against remote document // Resolve reference against remote document
if (!fetchDoc) { if (!fetchDoc) {
throw std::runtime_error( throw std::runtime_error(
"Support for JSON References not enabled."); "Fetching of remote JSON References not enabled.");
} }
const AdapterType * docPtr = NULL; const AdapterType *newDoc = fetchDoc(*documentUri);
const typename DocumentCache<AdapterType>::Type::const_iterator
docCacheItr = docCache.find(*documentUri);
if (docCacheItr == docCache.end()) {
// Returns a shared pointer to the remote document that was
// retrieved, or null if retrieval failed. The resulting
// document must remain in scope until populateSchema returns.
docPtr = (*fetchDoc)(*documentUri);
// Can't proceed without the remote document // Can't proceed without the remote document
if (!docPtr) { if (!newDoc) {
throw std::runtime_error( throw std::runtime_error(
"Failed to fetch referenced schema document."); "Failed to fetch referenced schema document: " +
} *documentUri);
// TODO: If this fails, how would the document be freed?
docCache.insert(
typename DocumentCache<AdapterType>::Type::value_type(
*documentUri, docPtr));
} else {
docPtr = docCacheItr->second;
} }
const AdapterType &ref = internal::json_pointer::resolveJsonPointer( // Add to document cache
*docPtr, jsonPointer); typedef typename DocumentCache<AdapterType>::Type::value_type
DocCacheValueType;
// Resolve reference against retrieved document docCache.insert(DocCacheValueType(*documentUri, newDoc));
populateSchema<AdapterType>(rootSchema, ref, ref, subschema,
currentScope, nodePath, fetchDoc, parentSubschema, ownName, const AdapterType &referencedAdapter =
docCache, schemaCache); internal::json_pointer::resolveJsonPointer(
*newDoc, actualJsonPointer);
// TODO: Need to detect degenerate circular references
resolveThenPopulateSchema(rootSchema, *newDoc, referencedAdapter,
schema, boost::none, actualJsonPointer, fetchDoc,
parentSchema, ownName, docCache, schemaCache);
} else { } else {
const AdapterType &ref = internal::json_pointer::resolveJsonPointer( const AdapterType &referencedAdapter =
rootNode, jsonPointer); internal::json_pointer::resolveJsonPointer(
rootNode, actualJsonPointer);
// Resolve reference against current document // TODO: Need to detect degenerate circular references
populateSchema<AdapterType>(rootSchema, rootNode, ref, subschema, resolveThenPopulateSchema(rootSchema, rootNode, referencedAdapter,
currentScope, nodePath, fetchDoc, parentSubschema, ownName, schema, boost::none, actualJsonPointer, fetchDoc,
docCache, schemaCache); parentSchema, ownName, docCache, schemaCache);
} }
} }

View File

@ -28,7 +28,7 @@ class TestFetchDocumentCallback : public ::testing::Test
const RapidJsonAdapter * fetchDocument(const std::string &uri) const RapidJsonAdapter * fetchDocument(const std::string &uri)
{ {
EXPECT_STREQ("test", uri.c_str()); EXPECT_STREQ("http://localhost:1234/", uri.c_str());
rapidjson::Value valueOfTypeAttribute; rapidjson::Value valueOfTypeAttribute;
valueOfTypeAttribute.SetString("string", allocator); valueOfTypeAttribute.SetString("string", allocator);
@ -60,7 +60,7 @@ TEST_F(TestFetchDocumentCallback, Basics)
rapidjson::Document schemaDocument; rapidjson::Document schemaDocument;
RapidJsonAdapter schemaDocumentAdapter(schemaDocument); RapidJsonAdapter schemaDocumentAdapter(schemaDocument);
schemaDocument.SetObject(); schemaDocument.SetObject();
schemaDocument.AddMember("$ref", "test#/", allocator); schemaDocument.AddMember("$ref", "http://localhost:1234/#/", allocator);
// Parse schema document // Parse schema document
Schema schema; Schema schema;