Move inline functions out of class definition for templating

2019-11-13 20:53:35 -08:00 · 2019-11-13 20:53:35 -08:00 · 708f4a094d
parent 2704b73399
commit 708f4a094d
1 changed files with 225 additions and 143 deletions
--- a/src/generic/stage1_find_marks.h
+++ b/src/generic/stage1_find_marks.h
@ -83,36 +83,7 @@ public:
  // indicate whether we end an iteration on an odd-length sequence of
  // backslashes, which modifies our subsequent search for odd-length
  // sequences of backslashes in an obvious way.
-  really_inline uint64_t follows_odd_sequence_of(const uint64_t match, uint64_t &overflow) {
-    const uint64_t even_bits = 0x5555555555555555ULL;
-    const uint64_t odd_bits = ~even_bits;
-    uint64_t start_edges = match & ~(match << 1);
-    /* flip lowest if we have an odd-length run at the end of the prior
-    * iteration */
-    uint64_t even_start_mask = even_bits ^ overflow;
-    uint64_t even_starts = start_edges & even_start_mask;
-    uint64_t odd_starts = start_edges & ~even_start_mask;
-    uint64_t even_carries = match + even_starts;
-
-    uint64_t odd_carries;
-    /* must record the carry-out of our odd-carries out of bit 63; this
-    * indicates whether the sense of any edge going to the next iteration
-    * should be flipped */
-    bool new_overflow = add_overflow(match, odd_starts, &odd_carries);
-
-    odd_carries |= overflow; /* push in bit zero as a
-                                * potential end if we had an
-                                * odd-numbered run at the
-                                * end of the previous
-                                * iteration */
-    overflow = new_overflow ? 0x1ULL : 0x0ULL;
-    uint64_t even_carry_ends = even_carries & ~match;
-    uint64_t odd_carry_ends = odd_carries & ~match;
-    uint64_t even_start_odd_end = even_carry_ends & odd_bits;
-    uint64_t odd_start_even_end = odd_carry_ends & even_bits;
-    uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
-    return odd_ends;
-  }
+  really_inline uint64_t follows_odd_sequence_of(const uint64_t match, uint64_t &overflow);

  //
  // Check if the current character immediately follows a matching character.
@ -121,11 +92,7 @@ public:
  //
  //     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
  //
-  really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
-    const uint64_t result = match << 1 | overflow;
-    overflow = match >> 63;
-    return result;
-  }
+  really_inline uint64_t follows(const uint64_t match, uint64_t &overflow);

  //
  // Check if the current character follows a matching character, with possible "filler" between.
@ -133,22 +100,9 @@ public:
  //
  //     in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { <whitespace>* }
  //
-  really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) {
-    uint64_t follows_match = follows(match, overflow);
-    uint64_t result;
-    overflow |= add_overflow(follows_match, filler, &result);
-    return result;
-  }
+  really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow);

-  really_inline ErrorValues detect_errors_on_eof() {
-    if (prev_in_string) {
-      return UNCLOSED_STRING;
-    }
-    if (unescaped_chars_error) {
-      return UNESCAPED_CHARS;
-    }
-    return SUCCESS;
-  }
+  really_inline ErrorValues detect_errors_on_eof();

  //
  // Return a mask of all string characters plus end quotes.
@ -158,28 +112,9 @@ public:
  //
  // Backslash sequences outside of quotes will be detected in stage 2.
  //
-  really_inline uint64_t find_strings(const simd::simd8x64<uint8_t> in) {
-    const uint64_t backslash = in.eq('\\');
-    const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
-    const uint64_t quote = in.eq('"') & ~escaped;
-    // prefix_xor flips on bits inside the string (and flips off the end quote).
-    const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
-    /* right shift of a signed value expected to be well-defined and standard
-    * compliant as of C++20,
-    * John Regher from Utah U. says this is fine code */
-    prev_in_string = static_cast<uint64_t>(static_cast<int64_t>(in_string) >> 63);
-    // Use ^ to turn the beginning quote off, and the end quote on.
-    return in_string ^ quote;
-  }
+  really_inline uint64_t find_strings(const simd::simd8x64<uint8_t> in);

-  really_inline uint64_t invalid_string_bytes(const uint64_t unescaped, const uint64_t quote_mask) {
-    /* All Unicode characters may be placed within the
-    * quotation marks, except for the characters that MUST be escaped:
-    * quotation mark, reverse solidus, and the control characters (U+0000
-    * through U+001F).
-    * https://tools.ietf.org/html/rfc8259 */
-    return quote_mask & unescaped;
-  }
+  really_inline uint64_t invalid_string_bytes(const uint64_t unescaped, const uint64_t quote_mask);

  //
  // Determine which characters are *structural*:
@ -197,20 +132,7 @@ public:
  // contents of a string the same as content outside. Errors and structurals inside the string or on
  // the trailing quote will need to be removed later when the correct string information is known.
  //
-  really_inline uint64_t find_potential_structurals(const simd::simd8x64<uint8_t> in) {
-    // These use SIMD so let's kick them off before running the regular 64-bit stuff ...
-    uint64_t whitespace, op;
-    find_whitespace_and_operators(in, whitespace, op);
-
-    // Detect the start of a run of primitive characters. Includes numbers, booleans, and strings (").
-    // Everything except whitespace, braces, colon and comma.
-    const uint64_t primitive = ~(op | whitespace);
-    const uint64_t follows_primitive = follows(primitive, prev_primitive);
-    const uint64_t start_primitive = primitive & ~follows_primitive;
-
-    // Return final structurals
-    return op | start_primitive;
-  }
+  really_inline uint64_t find_potential_structurals(const simd::simd8x64<uint8_t> in);

  //
  // Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
@ -231,67 +153,227 @@ public:
  // available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
  // workout.
  //
-  really_inline void scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) {
-    //
-    // Load up all 128 bytes into SIMD registers
-    //
-    simd::simd8x64<uint8_t> in_1(buf);
-    simd::simd8x64<uint8_t> in_2(buf+64);
-
-    //
-    // Find the strings and potential structurals (operators / primitives).
-    //
-    // This will include false structurals that are *inside* strings--we'll filter strings out
-    // before we return.
-    //
-    uint64_t string_1 = this->find_strings(in_1);
-    uint64_t structurals_1 = this->find_potential_structurals(in_1);
-    uint64_t string_2 = this->find_strings(in_2);
-    uint64_t structurals_2 = this->find_potential_structurals(in_2);
-
-    //
-    // Do miscellaneous work while the processor is busy calculating strings and structurals.
-    //
-    // After that, weed out structurals that are inside strings and find invalid string characters.
-    //
-    uint64_t unescaped_1 = in_1.lteq(0x1F);
-    utf8_checker.check_next_input(in_1);
-    this->structural_indexes.write_indexes(idx-64, prev_structurals); // Output *last* iteration's structurals to ParsedJson
-    this->prev_structurals = structurals_1 & ~string_1;
-    this->unescaped_chars_error |= unescaped_1 & string_1;
-
-    uint64_t unescaped_2 = in_2.lteq(0x1F);
-    utf8_checker.check_next_input(in_2);
-    this->structural_indexes.write_indexes(idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
-    this->prev_structurals = structurals_2 & ~string_2;
-    this->unescaped_chars_error |= unescaped_2 & string_2;
-  }
-
-  really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker) {
-    size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE;
-    size_t idx = 0;
-
-    for (; idx < lenminusstep; idx += STEP_SIZE) {
-      this->scan_step(&buf[idx], idx, utf8_checker);
-    }
-
-    /* If we have a final chunk of less than 64 bytes, pad it to 64 with
-    * spaces  before processing it (otherwise, we risk invalidating the UTF-8
-    * checks). */
-    if (likely(idx < len)) {
-      uint8_t tmp_buf[STEP_SIZE];
-      memset(tmp_buf, 0x20, STEP_SIZE);
-      memcpy(tmp_buf, buf + idx, len - idx);
-      this->scan_step(&tmp_buf[0], idx, utf8_checker);
-      idx += STEP_SIZE;
-    }
-
-    /* finally, flatten out the remaining structurals from the last iteration */
-    this->structural_indexes.write_indexes(idx-64, this->prev_structurals);
-  }
+  really_inline void scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker);

+  really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker);
 };

+// return a bitvector indicating where we have characters that end an odd-length
+// sequence of backslashes (and thus change the behavior of the next character
+// to follow). A even-length sequence of backslashes, and, for that matter, the
+// largest even-length prefix of our odd-length sequence of backslashes, simply
+// modify the behavior of the backslashes themselves.
+// We also update the prev_iter_ends_odd_backslash reference parameter to
+// indicate whether we end an iteration on an odd-length sequence of
+// backslashes, which modifies our subsequent search for odd-length
+// sequences of backslashes in an obvious way.
+really_inline uint64_t json_structural_scanner::follows_odd_sequence_of(const uint64_t match, uint64_t &overflow) {
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  const uint64_t odd_bits = ~even_bits;
+  uint64_t start_edges = match & ~(match << 1);
+  /* flip lowest if we have an odd-length run at the end of the prior
+  * iteration */
+  uint64_t even_start_mask = even_bits ^ overflow;
+  uint64_t even_starts = start_edges & even_start_mask;
+  uint64_t odd_starts = start_edges & ~even_start_mask;
+  uint64_t even_carries = match + even_starts;
+
+  uint64_t odd_carries;
+  /* must record the carry-out of our odd-carries out of bit 63; this
+  * indicates whether the sense of any edge going to the next iteration
+  * should be flipped */
+  bool new_overflow = add_overflow(match, odd_starts, &odd_carries);
+
+  odd_carries |= overflow; /* push in bit zero as a
+                              * potential end if we had an
+                              * odd-numbered run at the
+                              * end of the previous
+                              * iteration */
+  overflow = new_overflow ? 0x1ULL : 0x0ULL;
+  uint64_t even_carry_ends = even_carries & ~match;
+  uint64_t odd_carry_ends = odd_carries & ~match;
+  uint64_t even_start_odd_end = even_carry_ends & odd_bits;
+  uint64_t odd_start_even_end = odd_carry_ends & even_bits;
+  uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
+  return odd_ends;
+}
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+really_inline uint64_t json_structural_scanner::follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+//
+// Check if the current character follows a matching character, with possible "filler" between.
+// For example, this checks for empty curly braces, e.g. 
+//
+//     in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { <whitespace>* }
+//
+really_inline uint64_t json_structural_scanner::follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) {
+  uint64_t follows_match = follows(match, overflow);
+  uint64_t result;
+  overflow |= add_overflow(follows_match, filler, &result);
+  return result;
+}
+
+really_inline ErrorValues json_structural_scanner::detect_errors_on_eof() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  return SUCCESS;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64<uint8_t> in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
+  const uint64_t quote = in.eq('"') & ~escaped;
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+  /* right shift of a signed value expected to be well-defined and standard
+  * compliant as of C++20,
+  * John Regher from Utah U. says this is fine code */
+  prev_in_string = static_cast<uint64_t>(static_cast<int64_t>(in_string) >> 63);
+  // Use ^ to turn the beginning quote off, and the end quote on.
+  return in_string ^ quote;
+}
+
+really_inline uint64_t json_structural_scanner::invalid_string_bytes(const uint64_t unescaped, const uint64_t quote_mask) {
+  /* All Unicode characters may be placed within the
+  * quotation marks, except for the characters that MUST be escaped:
+  * quotation mark, reverse solidus, and the control characters (U+0000
+  * through U+001F).
+  * https://tools.ietf.org/html/rfc8259 */
+  return quote_mask & unescaped;
+}
+
+//
+// Determine which characters are *structural*:
+// - braces: [] and {}
+// - the start of primitives (123, true, false, null)
+// - the start of invalid non-whitespace (+, &, ture, UTF-8)
+//
+// Also detects value sequence errors:
+// - two values with no separator between ("hello" "world")
+// - separators with no values ([1,] [1,,]and [,2])
+//
+// This method will find all of the above whether it is in a string or not.
+//
+// To reduce dependency on the expensive "what is in a string" computation, this method treats the
+// contents of a string the same as content outside. Errors and structurals inside the string or on
+// the trailing quote will need to be removed later when the correct string information is known.
+//
+really_inline uint64_t json_structural_scanner::find_potential_structurals(const simd::simd8x64<uint8_t> in) {
+  // These use SIMD so let's kick them off before running the regular 64-bit stuff ...
+  uint64_t whitespace, op;
+  find_whitespace_and_operators(in, whitespace, op);
+
+  // Detect the start of a run of primitive characters. Includes numbers, booleans, and strings (").
+  // Everything except whitespace, braces, colon and comma.
+  const uint64_t primitive = ~(op | whitespace);
+  const uint64_t follows_primitive = follows(primitive, prev_primitive);
+  const uint64_t start_primitive = primitive & ~follows_primitive;
+
+  // Return final structurals
+  return op | start_primitive;
+}
+
+//
+// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, primitives and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+// 
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+really_inline void json_structural_scanner::scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) {
+  //
+  // Load up all 128 bytes into SIMD registers
+  //
+  simd::simd8x64<uint8_t> in_1(buf);
+  simd::simd8x64<uint8_t> in_2(buf+64);
+
+  //
+  // Find the strings and potential structurals (operators / primitives).
+  //
+  // This will include false structurals that are *inside* strings--we'll filter strings out
+  // before we return.
+  //
+  uint64_t string_1 = this->find_strings(in_1);
+  uint64_t structurals_1 = this->find_potential_structurals(in_1);
+  uint64_t string_2 = this->find_strings(in_2);
+  uint64_t structurals_2 = this->find_potential_structurals(in_2);
+
+  //
+  // Do miscellaneous work while the processor is busy calculating strings and structurals.
+  //
+  // After that, weed out structurals that are inside strings and find invalid string characters.
+  //
+  uint64_t unescaped_1 = in_1.lteq(0x1F);
+  utf8_checker.check_next_input(in_1);
+  this->structural_indexes.write_indexes(idx-64, prev_structurals); // Output *last* iteration's structurals to ParsedJson
+  this->prev_structurals = structurals_1 & ~string_1;
+  this->unescaped_chars_error |= unescaped_1 & string_1;
+
+  uint64_t unescaped_2 = in_2.lteq(0x1F);
+  utf8_checker.check_next_input(in_2);
+  this->structural_indexes.write_indexes(idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
+  this->prev_structurals = structurals_2 & ~string_2;
+  this->unescaped_chars_error |= unescaped_2 & string_2;
+}
+
+really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker) {
+  size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE;
+  size_t idx = 0;
+
+  for (; idx < lenminusstep; idx += STEP_SIZE) {
+    this->scan_step(&buf[idx], idx, utf8_checker);
+  }
+
+  /* If we have a final chunk of less than 64 bytes, pad it to 64 with
+  * spaces  before processing it (otherwise, we risk invalidating the UTF-8
+  * checks). */
+  if (likely(idx < len)) {
+    uint8_t tmp_buf[STEP_SIZE];
+    memset(tmp_buf, 0x20, STEP_SIZE);
+    memcpy(tmp_buf, buf + idx, len - idx);
+    this->scan_step(&tmp_buf[0], idx, utf8_checker);
+    idx += STEP_SIZE;
+  }
+
+  /* finally, flatten out the remaining structurals from the last iteration */
+  this->structural_indexes.write_indexes(idx-64, this->prev_structurals);
+}
+
 int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) {
  if (unlikely(len > pj.byte_capacity)) {
    std::cerr << "Your ParsedJson object only supports documents up to "