Number fixes (#1053)

* cleaning up the logic for parsing numbers * Add Testcases for new Reader in jsontestrunner
2026-01-11 16:43:32 +01:00 · 2019-11-08 19:49:16 -08:00
parent ff58fdcc75
commit 645cd0412c
115 changed files with 123 additions and 75 deletions
--- a/meson.build
+++ b/meson.build
@@ -15,7 +15,7 @@ project(
    'cpp_std=c++11',
    'warning_level=1'],
  license : 'Public Domain',
-  meson_version : '>= 0.50.0')
+  meson_version : '>= 0.49.0')


 jsoncpp_headers = [
--- a/src/jsontestrunner/main.cpp
+++ b/src/jsontestrunner/main.cpp
@@ -15,7 +15,9 @@

 #include <algorithm> // sort
 #include <cstdio>
+#include <iostream>
 #include <json/json.h>
+#include <memory>
 #include <sstream>

 struct Options {
@@ -126,19 +128,45 @@ static int parseAndSaveValueTree(const Json::String& input,
                                 const Json::String& actual,
                                 const Json::String& kind,
                                 const Json::Features& features, bool parseOnly,
-                                 Json::Value* root) {
-  Json::Reader reader(features);
-  bool parsingSuccessful =
-      reader.parse(input.data(), input.data() + input.size(), *root);
-  if (!parsingSuccessful) {
-    printf("Failed to parse %s file: \n%s\n", kind.c_str(),
-           reader.getFormattedErrorMessages().c_str());
-    return 1;
+                                 Json::Value* root, bool use_legacy) {
+  if (!use_legacy) {
+    Json::CharReaderBuilder builder;
+
+    builder.settings_["allowComments"] = features.allowComments_;
+    builder.settings_["strictRoot"] = features.strictRoot_;
+    builder.settings_["allowDroppedNullPlaceholders"] =
+        features.allowDroppedNullPlaceholders_;
+    builder.settings_["allowNumericKeys"] = features.allowNumericKeys_;
+
+    std::unique_ptr<Json::CharReader> reader(builder.newCharReader());
+    Json::String errors;
+    const bool parsingSuccessful =
+        reader->parse(input.data(), input.data() + input.size(), root, &errors);
+
+    if (!parsingSuccessful) {
+      std::cerr << "Failed to parse " << kind << " file: " << std::endl
+                << errors << std::endl;
+      return 1;
+    }
+
+    // We may instead check the legacy implementation (to ensure it doesn't
+    // randomly get broken).
+  } else {
+    Json::Reader reader(features);
+    const bool parsingSuccessful =
+        reader.parse(input.data(), input.data() + input.size(), *root);
+    if (!parsingSuccessful) {
+      std::cerr << "Failed to parse " << kind << " file: " << std::endl
+                << reader.getFormatedErrorMessages() << std::endl;
+      return 1;
+    }
  }
+
  if (!parseOnly) {
    FILE* factual = fopen(actual.c_str(), "wt");
    if (!factual) {
-      printf("Failed to create %s actual file.\n", kind.c_str());
+      std::cerr << "Failed to create '" << kind << "' actual file."
+                << std::endl;
      return 2;
    }
    printValueTree(factual, *root);
@@ -172,7 +200,7 @@ static int rewriteValueTree(const Json::String& rewritePath,
  *rewrite = write(root);
  FILE* fout = fopen(rewritePath.c_str(), "wt");
  if (!fout) {
-    printf("Failed to create rewrite file: %s\n", rewritePath.c_str());
+    std::cerr << "Failed to create rewrite file: " << rewritePath << std::endl;
    return 2;
  }
  fprintf(fout, "%s\n", rewrite->c_str());
@@ -193,14 +221,15 @@ static Json::String removeSuffix(const Json::String& path,
 static void printConfig() {
 // Print the configuration used to compile JsonCpp
 #if defined(JSON_NO_INT64)
-  printf("JSON_NO_INT64=1\n");
+  std::cout << "JSON_NO_INT64=1" << std::endl;
 #else
-  printf("JSON_NO_INT64=0\n");
+  std::cout << "JSON_NO_INT64=0" << std::endl;
 #endif
 }

 static int printUsage(const char* argv[]) {
-  printf("Usage: %s [--strict] input-json-file", argv[0]);
+  std::cout << "Usage: " << argv[0] << " [--strict] input-json-file"
+            << std::endl;
  return 3;
 }

@@ -230,7 +259,7 @@ static int parseCommandLine(int argc, const char* argv[], Options* opts) {
    } else if (writerName == "BuiltStyledStreamWriter") {
      opts->write = &useBuiltStyledStreamWriter;
    } else {
-      printf("Unknown '--json-writer %s'\n", writerName.c_str());
+      std::cerr << "Unknown '--json-writer' " << writerName << std::endl;
      return 4;
    }
  }
@@ -240,19 +269,20 @@ static int parseCommandLine(int argc, const char* argv[], Options* opts) {
  opts->path = argv[index];
  return 0;
 }
-static int runTest(Options const& opts) {
+
+static int runTest(Options const& opts, bool use_legacy) {
  int exitCode = 0;

  Json::String input = readInputTestFile(opts.path.c_str());
  if (input.empty()) {
-    printf("Failed to read input or empty input: %s\n", opts.path.c_str());
+    std::cerr << "Invalid input file: " << opts.path << std::endl;
    return 3;
  }

  Json::String basePath = removeSuffix(opts.path, ".json");
  if (!opts.parseOnly && basePath.empty()) {
-    printf("Bad input path. Path does not end with '.expected':\n%s\n",
-           opts.path.c_str());
+    std::cerr << "Bad input path '" << opts.path
+              << "'. Must end with '.expected'" << std::endl;
    return 3;
  }

@@ -262,34 +292,47 @@ static int runTest(Options const& opts) {

  Json::Value root;
  exitCode = parseAndSaveValueTree(input, actualPath, "input", opts.features,
-                                   opts.parseOnly, &root);
+                                   opts.parseOnly, &root, use_legacy);
  if (exitCode || opts.parseOnly) {
    return exitCode;
  }
+
  Json::String rewrite;
  exitCode = rewriteValueTree(rewritePath, root, opts.write, &rewrite);
  if (exitCode) {
    return exitCode;
  }
+
  Json::Value rewriteRoot;
  exitCode = parseAndSaveValueTree(rewrite, rewriteActualPath, "rewrite",
-                                   opts.features, opts.parseOnly, &rewriteRoot);
-  if (exitCode) {
-    return exitCode;
-  }
-  return 0;
+                                   opts.features, opts.parseOnly, &rewriteRoot,
+                                   use_legacy);
+
+  return exitCode;
 }
+
 int main(int argc, const char* argv[]) {
  Options opts;
  try {
    int exitCode = parseCommandLine(argc, argv, &opts);
    if (exitCode != 0) {
-      printf("Failed to parse command-line.");
+      std::cerr << "Failed to parse command-line." << std::endl;
      return exitCode;
    }
-    return runTest(opts);
+
+    const int modern_return_code = runTest(opts, false);
+    if (modern_return_code) {
+      return modern_return_code;
+    }
+
+    const std::string filename =
+        opts.path.substr(opts.path.find_last_of("\\/") + 1);
+    const bool should_run_legacy = (filename.rfind("legacy_", 0) == 0);
+    if (should_run_legacy) {
+      return runTest(opts, true);
+    }
  } catch (const std::exception& e) {
-    printf("Unhandled exception:\n%s\n", e.what());
+    std::cerr << "Unhandled exception:" << std::endl << e.what() << std::endl;
    return 1;
  }
 }
--- a/src/lib_json/json_reader.cpp
+++ b/src/lib_json/json_reader.cpp
@@ -1540,19 +1540,45 @@ bool OurReader::decodeNumber(Token& token, Value& decoded) {
  // larger than the maximum supported value of an integer then
  // we decode the number as a double.
  Location current = token.start_;
-  bool isNegative = *current == '-';
-  if (isNegative)
+  const bool isNegative = *current == '-';
+  if (isNegative) {
    ++current;
+  }

-  static constexpr auto positive_threshold = Value::maxLargestUInt / 10;
-  static constexpr auto positive_last_digit = Value::maxLargestUInt % 10;
-  static constexpr auto negative_threshold =
-      Value::LargestUInt(Value::minLargestInt) / 10;
-  static constexpr auto negative_last_digit =
-      Value::LargestUInt(Value::minLargestInt) % 10;
+  // We assume we can represent the largest and smallest integer types as
+  // unsigned integers with separate sign. This is only true if they can fit
+  // into an unsigned integer.
+  static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
+                "Int must be smaller than UInt");

-  const auto threshold = isNegative ? negative_threshold : positive_threshold;
-  const auto last_digit =
+  // We need to convert minLargestInt into a positive number. The easiest way
+  // to do this conversion is to assume our "threshold" value of minLargestInt
+  // divided by 10 can fit in maxLargestInt when absolute valued. This should
+  // be a safe assumption.
+  static_assert(Value::minLargestInt <= -Value::maxLargestInt,
+                "The absolute value of minLargestInt must be greater than or "
+                "equal to maxLargestInt");
+  static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
+                "The absolute value of minLargestInt must be only 1 magnitude "
+                "larger than maxLargest Int");
+
+  static constexpr Value::LargestUInt positive_threshold =
+      Value::maxLargestUInt / 10;
+  static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
+
+  // For the negative values, we have to be more careful. Since typically
+  // -Value::minLargestInt will cause an overflow, we first divide by 10 and
+  // then take the inverse. This assumes that minLargestInt is only a single
+  // power of 10 different in magnitude, which we check above. For the last
+  // digit, we take the modulus before negating for the same reason.
+  static constexpr Value::LargestUInt negative_threshold =
+      Value::LargestUInt(-(Value::minLargestInt / 10));
+  static constexpr Value::UInt negative_last_digit =
+      Value::UInt(-(Value::minLargestInt % 10));
+
+  const Value::LargestUInt threshold =
+      isNegative ? negative_threshold : positive_threshold;
+  const Value::UInt max_last_digit =
      isNegative ? negative_last_digit : positive_last_digit;

  Value::LargestUInt value = 0;
@@ -1561,26 +1587,30 @@ bool OurReader::decodeNumber(Token& token, Value& decoded) {
    if (c < '0' || c > '9')
      return decodeDouble(token, decoded);

-    const auto digit(static_cast<Value::UInt>(c - '0'));
+    const Value::UInt digit(static_cast<Value::UInt>(c - '0'));
    if (value >= threshold) {
      // We've hit or exceeded the max value divided by 10 (rounded down). If
      // a) we've only just touched the limit, meaing value == threshold,
      // b) this is the last digit, or
      // c) it's small enough to fit in that rounding delta, we're okay.
      // Otherwise treat this number as a double to avoid overflow.
-      if (value > threshold || current != token.end_ || digit > last_digit) {
+      if (value > threshold || current != token.end_ ||
+          digit > max_last_digit) {
        return decodeDouble(token, decoded);
      }
    }
    value = value * 10 + digit;
  }

-  if (isNegative)
-    decoded = -Value::LargestInt(value);
-  else if (value <= Value::LargestUInt(Value::maxLargestInt))
+  if (isNegative) {
+    // We use the same magnitude assumption here, just in case.
+    const Value::UInt last_digit = static_cast<Value::UInt>(value % 10);
+    decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
+  } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
    decoded = Value::LargestInt(value);
-  else
+  } else {
    decoded = value;
+  }

  return true;
 }
@@ -1597,37 +1627,12 @@ bool OurReader::decodeDouble(Token& token) {

 bool OurReader::decodeDouble(Token& token, Value& decoded) {
  double value = 0;
-  const int bufferSize = 32;
-  int count;
-  ptrdiff_t const length = token.end_ - token.start_;
-
-  // Sanity check to avoid buffer overflow exploits.
-  if (length < 0) {
-    return addError("Unable to parse token length", token);
-  }
-  auto const ulength = static_cast<size_t>(length);
-
-  // Avoid using a string constant for the format control string given to
-  // sscanf, as this can cause hard to debug crashes on OS X. See here for more
-  // info:
-  //
-  //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
-  char format[] = "%lf";
-
-  if (length <= bufferSize) {
-    Char buffer[bufferSize + 1];
-    memcpy(buffer, token.start_, ulength);
-    buffer[length] = 0;
-    fixNumericLocaleInput(buffer, buffer + length);
-    count = sscanf(buffer, format, &value);
-  } else {
-    String buffer(token.start_, token.end_);
-    count = sscanf(buffer.c_str(), format, &value);
-  }
-
-  if (count != 1)
+  const String buffer(token.start_, token.end_);
+  IStringStream is(buffer);
+  if (!(is >> value)) {
    return addError(
        "'" + String(token.start_, token.end_) + "' is not a number.", token);
+  }
  decoded = value;
  return true;
 }
@@ -1649,9 +1654,9 @@ bool OurReader::decodeString(Token& token, String& decoded) {
  Location end = token.end_ - 1;       // do not include '"'
  while (current != end) {
    Char c = *current++;
-    if (c == '"')
+    if (c == '"') {
      break;
-    else if (c == '\\') {
+    } else if (c == '\\') {
      if (current == end)
        return addError("Empty escape sequence in string", token, current);
      Char escape = *current++;
--- a/test/data/legacy_test_array_01.expected
+++ b/test/data/legacy_test_array_01.expected
--- a/test/data/legacy_test_array_01.json
+++ b/test/data/legacy_test_array_01.json
--- a/test/data/legacy_test_array_02.expected
+++ b/test/data/legacy_test_array_02.expected
--- a/test/data/legacy_test_array_02.json
+++ b/test/data/legacy_test_array_02.json
--- a/test/data/legacy_test_array_03.expected
+++ b/test/data/legacy_test_array_03.expected
--- a/test/data/legacy_test_array_03.json
+++ b/test/data/legacy_test_array_03.json
--- a/test/data/legacy_test_array_04.expected
+++ b/test/data/legacy_test_array_04.expected
--- a/test/data/legacy_test_array_04.json
+++ b/test/data/legacy_test_array_04.json
--- a/test/data/legacy_test_array_05.expected
+++ b/test/data/legacy_test_array_05.expected
--- a/test/data/legacy_test_array_05.json
+++ b/test/data/legacy_test_array_05.json
--- a/test/data/legacy_test_array_06.expected
+++ b/test/data/legacy_test_array_06.expected
--- a/test/data/legacy_test_array_06.json
+++ b/test/data/legacy_test_array_06.json
--- a/test/data/legacy_test_array_07.expected
+++ b/test/data/legacy_test_array_07.expected
--- a/test/data/legacy_test_array_07.json
+++ b/test/data/legacy_test_array_07.json
--- a/test/data/legacy_test_basic_01.expected
+++ b/test/data/legacy_test_basic_01.expected
--- a/test/data/legacy_test_basic_01.json
+++ b/test/data/legacy_test_basic_01.json
--- a/test/data/legacy_test_basic_02.expected
+++ b/test/data/legacy_test_basic_02.expected
--- a/test/data/legacy_test_basic_02.json
+++ b/test/data/legacy_test_basic_02.json
--- a/test/data/legacy_test_basic_03.expected
+++ b/test/data/legacy_test_basic_03.expected
--- a/test/data/legacy_test_basic_03.json
+++ b/test/data/legacy_test_basic_03.json
--- a/test/data/legacy_test_basic_04.expected
+++ b/test/data/legacy_test_basic_04.expected
--- a/test/data/legacy_test_basic_04.json
+++ b/test/data/legacy_test_basic_04.json
--- a/test/data/legacy_test_basic_05.expected
+++ b/test/data/legacy_test_basic_05.expected
--- a/test/data/legacy_test_basic_05.json
+++ b/test/data/legacy_test_basic_05.json
--- a/test/data/legacy_test_basic_06.expected
+++ b/test/data/legacy_test_basic_06.expected
--- a/test/data/legacy_test_basic_06.json
+++ b/test/data/legacy_test_basic_06.json
--- a/test/data/legacy_test_basic_07.expected
+++ b/test/data/legacy_test_basic_07.expected
--- a/test/data/legacy_test_basic_07.json
+++ b/test/data/legacy_test_basic_07.json
--- a/test/data/legacy_test_basic_08.expected
+++ b/test/data/legacy_test_basic_08.expected
--- a/test/data/legacy_test_basic_08.json
+++ b/test/data/legacy_test_basic_08.json
--- a/test/data/legacy_test_basic_09.expected
+++ b/test/data/legacy_test_basic_09.expected
--- a/test/data/legacy_test_basic_09.json
+++ b/test/data/legacy_test_basic_09.json
--- a/test/data/legacy_test_comment_00.expected
+++ b/test/data/legacy_test_comment_00.expected
--- a/test/data/legacy_test_comment_00.json
+++ b/test/data/legacy_test_comment_00.json
--- a/test/data/legacy_test_comment_01.expected
+++ b/test/data/legacy_test_comment_01.expected
--- a/test/data/legacy_test_comment_01.json
+++ b/test/data/legacy_test_comment_01.json
--- a/test/data/legacy_test_comment_02.expected
+++ b/test/data/legacy_test_comment_02.expected
--- a/test/data/legacy_test_comment_02.json
+++ b/test/data/legacy_test_comment_02.json
--- a/test/data/legacy_test_complex_01.expected
+++ b/test/data/legacy_test_complex_01.expected
--- a/test/data/legacy_test_complex_01.json
+++ b/test/data/legacy_test_complex_01.json
--- a/test/data/legacy_test_integer_01.expected
+++ b/test/data/legacy_test_integer_01.expected
--- a/test/data/legacy_test_integer_01.json
+++ b/test/data/legacy_test_integer_01.json
--- a/test/data/legacy_test_integer_02.expected
+++ b/test/data/legacy_test_integer_02.expected
--- a/test/data/legacy_test_integer_02.json
+++ b/test/data/legacy_test_integer_02.json
--- a/test/data/legacy_test_integer_03.expected
+++ b/test/data/legacy_test_integer_03.expected
--- a/test/data/legacy_test_integer_03.json
+++ b/test/data/legacy_test_integer_03.json
--- a/test/data/legacy_test_integer_04.expected
+++ b/test/data/legacy_test_integer_04.expected
--- a/test/data/legacy_test_integer_04.json
+++ b/test/data/legacy_test_integer_04.json
--- a/test/data/legacy_test_integer_05.expected
+++ b/test/data/legacy_test_integer_05.expected
--- a/test/data/legacy_test_integer_05.json
+++ b/test/data/legacy_test_integer_05.json
--- a/test/data/legacy_test_integer_06_64bits.expected
+++ b/test/data/legacy_test_integer_06_64bits.expected
--- a/test/data/legacy_test_integer_06_64bits.json
+++ b/test/data/legacy_test_integer_06_64bits.json
--- a/test/data/legacy_test_integer_07_64bits.expected
+++ b/test/data/legacy_test_integer_07_64bits.expected
--- a/test/data/legacy_test_integer_07_64bits.json
+++ b/test/data/legacy_test_integer_07_64bits.json
--- a/test/data/legacy_test_integer_08_64bits.expected
+++ b/test/data/legacy_test_integer_08_64bits.expected
--- a/test/data/legacy_test_integer_08_64bits.json
+++ b/test/data/legacy_test_integer_08_64bits.json
--- a/test/data/legacy_test_large_01.expected
+++ b/test/data/legacy_test_large_01.expected
--- a/test/data/legacy_test_large_01.json
+++ b/test/data/legacy_test_large_01.json
--- a/test/data/legacy_test_object_01.expected
+++ b/test/data/legacy_test_object_01.expected
--- a/test/data/legacy_test_object_01.json
+++ b/test/data/legacy_test_object_01.json
--- a/test/data/legacy_test_object_02.expected
+++ b/test/data/legacy_test_object_02.expected
--- a/test/data/legacy_test_object_02.json
+++ b/test/data/legacy_test_object_02.json
--- a/test/data/legacy_test_object_03.expected
+++ b/test/data/legacy_test_object_03.expected
--- a/test/data/legacy_test_object_03.json
+++ b/test/data/legacy_test_object_03.json
--- a/test/data/legacy_test_object_04.expected
+++ b/test/data/legacy_test_object_04.expected
--- a/test/data/legacy_test_object_04.json
+++ b/test/data/legacy_test_object_04.json
--- a/test/data/legacy_test_preserve_comment_01.expected
+++ b/test/data/legacy_test_preserve_comment_01.expected
--- a/test/data/legacy_test_preserve_comment_01.json
+++ b/test/data/legacy_test_preserve_comment_01.json
--- a/test/data/legacy_test_real_01.expected
+++ b/test/data/legacy_test_real_01.expected
--- a/test/data/legacy_test_real_01.json
+++ b/test/data/legacy_test_real_01.json
--- a/test/data/legacy_test_real_02.expected
+++ b/test/data/legacy_test_real_02.expected
--- a/test/data/legacy_test_real_02.json
+++ b/test/data/legacy_test_real_02.json
--- a/test/data/legacy_test_real_03.expected
+++ b/test/data/legacy_test_real_03.expected
--- a/test/data/legacy_test_real_03.json
+++ b/test/data/legacy_test_real_03.json
--- a/test/data/legacy_test_real_04.expected
+++ b/test/data/legacy_test_real_04.expected
--- a/test/data/legacy_test_real_04.json
+++ b/test/data/legacy_test_real_04.json
--- a/test/data/legacy_test_real_05.expected
+++ b/test/data/legacy_test_real_05.expected
--- a/test/data/legacy_test_real_05.json
+++ b/test/data/legacy_test_real_05.json
--- a/test/data/legacy_test_real_06.expected
+++ b/test/data/legacy_test_real_06.expected
--- a/test/data/legacy_test_real_06.json
+++ b/test/data/legacy_test_real_06.json
--- a/test/data/legacy_test_real_07.expected
+++ b/test/data/legacy_test_real_07.expected
--- a/test/data/legacy_test_real_07.json
+++ b/test/data/legacy_test_real_07.json
--- a/test/data/legacy_test_real_08.expected
+++ b/test/data/legacy_test_real_08.expected
--- a/test/data/legacy_test_real_08.json
+++ b/test/data/legacy_test_real_08.json
--- a/test/data/legacy_test_real_09.expected
+++ b/test/data/legacy_test_real_09.expected
--- a/test/data/legacy_test_real_09.json
+++ b/test/data/legacy_test_real_09.json
--- a/test/data/legacy_test_real_10.expected
+++ b/test/data/legacy_test_real_10.expected
--- a/test/data/legacy_test_real_10.json
+++ b/test/data/legacy_test_real_10.json
--- a/test/data/legacy_test_real_11.expected
+++ b/test/data/legacy_test_real_11.expected
--- a/test/data/legacy_test_real_11.json
+++ b/test/data/legacy_test_real_11.json
--- a/test/data/legacy_test_real_12.expected
+++ b/test/data/legacy_test_real_12.expected
--- a/test/data/legacy_test_real_12.json
+++ b/test/data/legacy_test_real_12.json
--- a/test/data/legacy_test_string_01.expected
+++ b/test/data/legacy_test_string_01.expected
--- a/test/data/legacy_test_string_01.json
+++ b/test/data/legacy_test_string_01.json
--- a/test/data/legacy_test_string_02.expected
+++ b/test/data/legacy_test_string_02.expected
--- a/test/data/legacy_test_string_02.json
+++ b/test/data/legacy_test_string_02.json
--- a/test/data/legacy_test_string_03.expected
+++ b/test/data/legacy_test_string_03.expected
--- a/Show More
+++ b/Show More