Return error codes from parse_number

2020-08-12 13:06:33 -07:00 · 2020-08-12 13:06:33 -07:00 · 9475b947f5
parent 18564f1ae2
commit 9475b947f5
2 changed files with 29 additions and 28 deletions
--- a/src/generic/stage2/numberparsing.h
+++ b/src/generic/stage2/numberparsing.h
@ -7,12 +7,12 @@ namespace stage2 {
 namespace numberparsing {

 #ifdef JSON_TEST_NUMBERS
-#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), false)
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
 #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), writer.append_s64((VALUE)))
 #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), writer.append_u64((VALUE)))
 #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), writer.append_double((VALUE)))
 #else
-#define INVALID_NUMBER(SRC) (false)
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
 #define WRITE_INTEGER(VALUE, SRC, WRITER) writer.append_s64((VALUE))
 #define WRITE_UNSIGNED(VALUE, SRC, WRITER) writer.append_u64((VALUE))
 #define WRITE_DOUBLE(VALUE, SRC, WRITER) writer.append_double((VALUE))
@ -252,11 +252,11 @@ simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
 }

 template<typename W>
-bool slow_float_parsing(SIMDJSON_UNUSED const uint8_t * src, W writer) {
+error_code slow_float_parsing(SIMDJSON_UNUSED const uint8_t * src, W writer) {
  double d;
  if (parse_float_strtod(src, &d)) {
    WRITE_DOUBLE(d, src, writer);
-    return true;
+    return SUCCESS;
  }
  return INVALID_NUMBER(src);
 }
@ -273,7 +273,7 @@ simdjson_really_inline bool parse_digit(const uint8_t c, I &i) {
  return true;
 }

-simdjson_really_inline bool parse_decimal(SIMDJSON_UNUSED const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+simdjson_really_inline error_code parse_decimal(SIMDJSON_UNUSED const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
  // we continue with the fiction that we have an integer. If the
  // floating point number is representable as x * 10^z for some integer
  // z that fits in 53 bits, then we will be able to convert back the
@ -296,10 +296,10 @@ simdjson_really_inline bool parse_decimal(SIMDJSON_UNUSED const uint8_t *const s
  if (exponent == 0) {
    return INVALID_NUMBER(src);
  }
-  return true;
+  return SUCCESS;
 }

-simdjson_really_inline bool parse_exponent(SIMDJSON_UNUSED const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+simdjson_really_inline error_code parse_exponent(SIMDJSON_UNUSED const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
  // Exp Sign: -123.456e[-]78
  bool neg_exp = ('-' == *p);
  if (neg_exp || '+' == *p) { p++; } // Skip + as well
@ -347,11 +347,11 @@ simdjson_really_inline bool parse_exponent(SIMDJSON_UNUSED const uint8_t *const
  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
  // To sum it up: the next line should never overflow.
  exponent += (neg_exp ? -exp_number : exp_number);
-  return true;
+  return SUCCESS;
 }

 template<typename W>
-simdjson_really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, int digit_count, int64_t exponent, W &writer) {
+simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, int digit_count, int64_t exponent, W &writer) {
  // If we frequently had to deal with long strings of digits,
  // we could extend our code by using a 128-bit integer instead
  // of a 64-bit integer. However, this is uncommon in practice.
@ -373,11 +373,11 @@ simdjson_really_inline bool write_float(const uint8_t *const src, bool negative,
      // 10000000000000000000000000000000000000000000e+308
      // 3.1415926535897932384626433832795028841971693993751
      //
-      bool success = slow_float_parsing(src, writer);
+      error_code error = slow_float_parsing(src, writer);
      // The number was already written, but we made a copy of the writer
      // when we passed it to the parse_large_integer() function, so
      writer.skip_double();
-      return success;
+      return error;
    }
  }
  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
@ -386,11 +386,11 @@ simdjson_really_inline bool write_float(const uint8_t *const src, bool negative,
  if (simdjson_unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) {
    // this is almost never going to get called!!!
    // we start anew, going slowly!!!
-    bool success = slow_float_parsing(src, writer);
+    error_code error = slow_float_parsing(src, writer);
    // The number was already written, but we made a copy of the writer when we passed it to the
    // slow_float_parsing() function, so we have to skip those tape spots now that we've returned
    writer.skip_double();
-    return success;
+    return error;
  }
  bool success = true;
  double d = compute_float_64(exponent, i, negative, &success);
@ -399,16 +399,16 @@ simdjson_really_inline bool write_float(const uint8_t *const src, bool negative,
    if (!parse_float_strtod(src, &d)) { return INVALID_NUMBER(src); }
  }
  WRITE_DOUBLE(d, src, writer);
-  return true;
+  return SUCCESS;
 }

 // for performance analysis, it is sometimes  useful to skip parsing
 #ifdef SIMDJSON_SKIPNUMBERPARSING

 template<typename W>
-simdjson_really_inline bool parse_number(const uint8_t *const, W &writer) {
+simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) {
  writer.append_s64(0);        // always write zero
-  return true;                 // always succeeds
+  return SUCCESS;              // always succeeds
 }

 #else
@ -423,7 +423,7 @@ simdjson_really_inline bool parse_number(const uint8_t *const, W &writer) {
 //
 // Our objective is accurate parsing (ULP of 0) at high speed.
 template<typename W>
-simdjson_really_inline bool parse_number(const uint8_t *const src, W &writer) {
+simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) {

  //
  // Check for minus sign
@ -451,17 +451,19 @@ simdjson_really_inline bool parse_number(const uint8_t *const src, W &writer) {
  if ('.' == *p) {
    is_float = true;
    ++p;
-    if (!parse_decimal(src, p, i, exponent)) { return false; }
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
    digit_count = int(p - start_digits); // used later to guard against overflows
  }
  if (('e' == *p) || ('E' == *p)) {
    is_float = true;
    ++p;
-    if (!parse_exponent(src, p, exponent)) { return false; }
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
  }
  if (is_float) {
    const bool clean_end = is_structural_or_whitespace(*p);
-    return write_float(src, negative, i, start_digits, digit_count, exponent, writer) && clean_end;
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (!clean_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
  }

  // The longest negative 64-bit number is 19 digits.
@ -470,13 +472,12 @@ simdjson_really_inline bool parse_number(const uint8_t *const src, W &writer) {
  int longest_digit_count = negative ? 19 : 20;
  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
  if (digit_count == longest_digit_count) {
-    if(negative) {
+    if (negative) {
      // Anything negative above INT64_MAX+1 is invalid
-      if (i > uint64_t(INT64_MAX)+1) {
-        return INVALID_NUMBER(src); 
-      }
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
      WRITE_INTEGER(~i+1, src, writer);
-      return is_structural_or_whitespace(*p);
+      if (!is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
    // Positive overflow check:
    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
    //   biggest uint64_t.
@ -498,7 +499,8 @@ simdjson_really_inline bool parse_number(const uint8_t *const src, W &writer) {
  } else {
    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
  }
-  return is_structural_or_whitespace(*p);
+  if (!is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
 }

 // SAX functions
--- a/src/generic/stage2/tape_builder.h
+++ b/src/generic/stage2/tape_builder.h
@ -160,8 +160,7 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_

 SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
  iter.log_value("number");
-  if (!numberparsing::parse_number(value, tape)) { iter.log_error("Invalid number"); return NUMBER_ERROR; }
-  return SUCCESS;
+  return numberparsing::parse_number(value, tape);
 }

 SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {