From 0eb6cb8e5f2ce8bce04768c801e569765378bb97 Mon Sep 17 00:00:00 2001 From: miloyip Date: Sun, 3 May 2015 14:14:05 +0800 Subject: [PATCH] Add equality/inequality operator, URI fragment stringify and UTF-8 Percent Encoding/Decoding --- include/rapidjson/pointer.h | 162 ++++++++++++++++++++++++++++------ test/unittest/pointertest.cpp | 77 ++++++++++++++-- 2 files changed, 207 insertions(+), 32 deletions(-) diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h index 4086751e..2d424123 100644 --- a/include/rapidjson/pointer.h +++ b/include/rapidjson/pointer.h @@ -123,7 +123,7 @@ public: for (Token *t = rhs.tokens_; t != rhs.tokens_ + tokenCount_; ++t) nameBufferSize += t->length; nameBuffer_ = (Ch*)allocator_->Malloc(nameBufferSize * sizeof(Ch)); - std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize); + std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch)); tokens_ = (Token*)allocator_->Malloc(tokenCount_ * sizeof(Token)); std::memcpy(tokens_, rhs.tokens_, tokenCount_ * sizeof(Token)); @@ -149,20 +149,34 @@ public: size_t GetTokenCount() const { return tokenCount_; } - template - void Stringify(OutputStream& os) const { - RAPIDJSON_ASSERT(IsValid()); - for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { - os.Put('/'); - for (size_t j = 0; j < t->length; j++) { - Ch c = t->name[j]; - if (c == '~') { os.Put('~'); os.Put('0'); } - else if (c == '/') { os.Put('~'); os.Put('1'); } - else os.Put(c); + bool operator==(const GenericPointer& rhs) const { + if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_) + return false; + + for (size_t i = 0; i < tokenCount_; i++) { + if (tokens_[i].index != rhs.tokens_[i].index || + tokens_[i].length != rhs.tokens_[i].length || + std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length) != 0) + { + return false; } } + + return true; } + bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); } + + template + bool Stringify(OutputStream& os) const { + return Stringify(os); + } + + template + bool StringifyUriFragment(OutputStream& os) const { + return Stringify(os); + } + ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const { RAPIDJSON_ASSERT(IsValid()); ValueType* v = &root; @@ -365,6 +379,11 @@ public: } private: + bool NeedPercentEncode(Ch c) const { + // RFC 3986 2.3 Unreserved Characters + return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~'); + } + //! Parse a JSON String or its URI fragment representation into tokens. /*! \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated. @@ -409,32 +428,37 @@ private: bool isNumber = true; while (i < length && source[i] != '/') { - Ch c = source[i++]; + Ch c = source[i]; if (uriFragment) { // Decoding percent-encoding for URI fragment if (c == '%') { - c = 0; - for (int j = 0; j < 2; j++) { - c <<= 4; - Ch h = source[i]; - if (h >= '0' && h <= '9') c += h - '0'; - else if (h >= 'A' && h <= 'F') c += h - 'A' + 10; - else if (h >= 'a' && h <= 'f') c += h - 'a' + 10; - else { - parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding; - goto error; - } + PercentDecodeStream is(&source[i]); + GenericInsituStringStream os(name); + Ch* begin = os.PutBegin(); + Transcoder, EncodingType> transcoder; + if (!transcoder.Transcode(is, os) || !is.IsValid()) { + parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding; + goto error; + } + size_t len = os.PutEnd(begin); + i += is.Tell() - 1; + if (len == 1) + c = *name; + else { + name += len; + isNumber = false; i++; + continue; } } - else if (!((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~')) { - // RFC 3986 2.3 Unreserved Characters - i--; + else if (NeedPercentEncode(c)) { parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode; goto error; } } + + i++; // Escaping "~0" -> '~', "~1" -> '/' if (c == '~') { @@ -498,6 +522,92 @@ private: return; } + template + bool Stringify(OutputStream& os) const { + RAPIDJSON_ASSERT(IsValid()); + + if (uriFragment) + os.Put('#'); + + for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + os.Put('/'); + for (size_t j = 0; j < t->length; j++) { + Ch c = t->name[j]; + if (c == '~') { + os.Put('~'); + os.Put('0'); + } + else if (c == '/') { + os.Put('~'); + os.Put('1'); + } + else if (uriFragment && NeedPercentEncode(c)) { + // Transcode to UTF8 sequence + GenericStringStream source(&t->name[j]); + PercentEncodeStream target(os); + Transcoder > transcoder; + if (!transcoder.Transcode(source, target)) + return false; + j += source.Tell() - 1; + } + else + os.Put(c); + } + } + return true; + } + + class PercentDecodeStream { + public: + PercentDecodeStream(const Ch* source) : src_(source), head_(source), valid_(true) {} + + Ch Take() { + if (*src_ != '%') { + valid_ = false; + return 0; + } + src_++; + Ch c = 0; + for (int j = 0; j < 2; j++) { + c <<= 4; + Ch h = *src_; + if (h >= '0' && h <= '9') c += h - '0'; + else if (h >= 'A' && h <= 'F') c += h - 'A' + 10; + else if (h >= 'a' && h <= 'f') c += h - 'a' + 10; + else { + valid_ = false; + return 0; + } + src_++; + } + return c; + } + + size_t Tell() const { return src_ - head_; } + + bool IsValid() const { return valid_; } + + private: + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. + bool valid_; + }; + + template + class PercentEncodeStream { + public: + PercentEncodeStream(OutputStream& os) : os_(os) {} + void Put(char c) { // UTF-8 must be byte + unsigned char u = static_cast(c); + static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + os_.Put('%'); + os_.Put(hexDigits[u >> 4]); + os_.Put(hexDigits[u & 15]); + } + private: + OutputStream& os_; + }; + Allocator* allocator_; Allocator* ownAllocator_; Ch* nameBuffer_; diff --git a/test/unittest/pointertest.cpp b/test/unittest/pointertest.cpp index 32f659b9..3a6742e9 100644 --- a/test/unittest/pointertest.cpp +++ b/test/unittest/pointertest.cpp @@ -277,6 +277,46 @@ TEST(Pointer, Parse_URIFragment) { EXPECT_EQ(kPointerInvalidIndex, p.GetTokens()[0].index); } + { + // Decode UTF-8 perecent encoding to UTF-8 + Pointer p("#/%C2%A2"); + EXPECT_TRUE(p.IsValid()); + EXPECT_EQ(1u, p.GetTokenCount()); + EXPECT_STREQ("\xC2\xA2", p.GetTokens()[0].name); + } + + { + // Decode UTF-8 perecent encoding to UTF-16 + GenericPointer > > p(L"#/%C2%A2"); + EXPECT_TRUE(p.IsValid()); + EXPECT_EQ(1u, p.GetTokenCount()); + EXPECT_STREQ(L"\xA2", p.GetTokens()[0].name); + } + + { + // Decode UTF-8 perecent encoding to UTF-16 + GenericPointer > > p(L"#/%C2%A2"); + EXPECT_TRUE(p.IsValid()); + EXPECT_EQ(1u, p.GetTokenCount()); + EXPECT_STREQ(L"\xA2", p.GetTokens()[0].name); + } + + { + // Decode UTF-8 perecent encoding to UTF-16 + GenericPointer > > p(L"#/%C2%A2"); + EXPECT_TRUE(p.IsValid()); + EXPECT_EQ(1u, p.GetTokenCount()); + EXPECT_STREQ(L"\x00A2", p.GetTokens()[0].name); + } + + { + // Decode UTF-8 perecent encoding to UTF-16 + GenericPointer > > p(L"#/%E2%82%AC"); + EXPECT_TRUE(p.IsValid()); + EXPECT_EQ(1u, p.GetTokenCount()); + EXPECT_STREQ(L"\x20AC", p.GetTokens()[0].name); + } + { // kPointerParseErrorTokenMustBeginWithSolidus Pointer p("# "); @@ -306,7 +346,7 @@ TEST(Pointer, Parse_URIFragment) { Pointer p("#/%"); EXPECT_FALSE(p.IsValid()); EXPECT_EQ(kPointerParseErrorInvalidPercentEncoding, p.GetParseErrorCode()); - EXPECT_EQ(3u, p.GetParseErrorOffset()); + EXPECT_EQ(2u, p.GetParseErrorOffset()); } { @@ -314,7 +354,7 @@ TEST(Pointer, Parse_URIFragment) { Pointer p("#/%g0"); EXPECT_FALSE(p.IsValid()); EXPECT_EQ(kPointerParseErrorInvalidPercentEncoding, p.GetParseErrorCode()); - EXPECT_EQ(3u, p.GetParseErrorOffset()); + EXPECT_EQ(2u, p.GetParseErrorOffset()); } { @@ -322,7 +362,7 @@ TEST(Pointer, Parse_URIFragment) { Pointer p("#/%0g"); EXPECT_FALSE(p.IsValid()); EXPECT_EQ(kPointerParseErrorInvalidPercentEncoding, p.GetParseErrorCode()); - EXPECT_EQ(4u, p.GetParseErrorOffset()); + EXPECT_EQ(2u, p.GetParseErrorOffset()); } { @@ -335,12 +375,11 @@ TEST(Pointer, Parse_URIFragment) { { // kPointerParseErrorCharacterMustPercentEncode - Pointer p("#/\\"); + Pointer p("#/\n"); EXPECT_FALSE(p.IsValid()); EXPECT_EQ(kPointerParseErrorCharacterMustPercentEncode, p.GetParseErrorCode()); EXPECT_EQ(2u, p.GetParseErrorOffset()); } - } TEST(Pointer, Stringify) { @@ -357,7 +396,10 @@ TEST(Pointer, Stringify) { "/i\\j", "/k\"l", "/ ", - "/m~0n" + "/m~0n", + "/\xC2\xA2", + "/\xE2\x82\xAC", + "/\xF0\x9D\x84\x9E" }; for (size_t i = 0; i < sizeof(sources) / sizeof(sources[0]); i++) { @@ -365,6 +407,13 @@ TEST(Pointer, Stringify) { StringBuffer s; p.Stringify(s); EXPECT_STREQ(sources[i], s.GetString()); + + // Stringify to URI fragment + StringBuffer s2; + p.StringifyUriFragment(s2); + Pointer p2(s2.GetString(), s2.GetSize()); + EXPECT_TRUE(p2.IsValid()); + EXPECT_TRUE(p == p2); } } @@ -444,6 +493,22 @@ TEST(Pointer, Assignment) { } } +TEST(Pointer, Equality) { + EXPECT_TRUE(Pointer("/foo/0") == Pointer("/foo/0")); + EXPECT_FALSE(Pointer("/foo/0") == Pointer("/foo/1")); + EXPECT_FALSE(Pointer("/foo/0") == Pointer("/foo/0/1")); + EXPECT_FALSE(Pointer("/foo/0") == Pointer("a")); + EXPECT_FALSE(Pointer("a") == Pointer("a")); // Invalid always not equal +} + +TEST(Pointer, Inequality) { + EXPECT_FALSE(Pointer("/foo/0") != Pointer("/foo/0")); + EXPECT_TRUE(Pointer("/foo/0") != Pointer("/foo/1")); + EXPECT_TRUE(Pointer("/foo/0") != Pointer("/foo/0/1")); + EXPECT_TRUE(Pointer("/foo/0") != Pointer("a")); + EXPECT_TRUE(Pointer("a") != Pointer("a")); // Invalid always not equal +} + TEST(Pointer, Create) { Document d; {