From 399d08c86cf5e2dc6709f71c9822343aa1784ecd Mon Sep 17 00:00:00 2001 From: Paul Dreik Date: Tue, 31 Dec 2019 20:31:45 +0100 Subject: [PATCH] use unique_ptr in class parsedjson (#417) * refactor parsedjson to use unique_ptr instead of owning raw pointer * fix a potential undefined behavior * output only first cpu in /proc/cpuinfo --- .drone.yml | 2 +- include/simdjson/parsedjson.h | 35 ++++---- include/simdjson/parsedjsoniterator.h | 4 +- scripts/checkperf.sh | 2 +- src/generic/stage1_find_marks.h | 4 +- src/generic/stringparsing.h | 2 +- src/parsedjson.cpp | 115 ++++++-------------------- 7 files changed, 49 insertions(+), 115 deletions(-) diff --git a/.drone.yml b/.drone.yml index 8c6ad18c..87980ca7 100644 --- a/.drone.yml +++ b/.drone.yml @@ -22,7 +22,7 @@ steps: image: gcc:8 environment: CHECKPERF_REPOSITORY: https://github.com/lemire/simdjson - commands: [ cat /proc/cpuinfo, make checkperf ] + commands: [ sed '/^$/Q' /proc/cpuinfo, make checkperf ] --- kind: pipeline name: x64-build diff --git a/include/simdjson/parsedjson.h b/include/simdjson/parsedjson.h index 4a80234a..30424646 100644 --- a/include/simdjson/parsedjson.h +++ b/include/simdjson/parsedjson.h @@ -5,6 +5,7 @@ #include "simdjson/simdjson.h" #include #include +#include #define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF @@ -21,10 +22,14 @@ class ParsedJson { public: // create a ParsedJson container with zero capacity, call allocate_capacity to // allocate memory - ParsedJson(); - ~ParsedJson(); - ParsedJson(ParsedJson &&p); - ParsedJson &operator=(ParsedJson &&o); + ParsedJson()=default; + ~ParsedJson()=default; + + // this is a move only class + ParsedJson(ParsedJson &&p) = default; + ParsedJson(const ParsedJson &p) = delete; + ParsedJson &operator=(ParsedJson &&o) = default; + ParsedJson &operator=(const ParsedJson &o) = delete; // if needed, allocate memory so that the object is able to process JSON // documents having up to len bytes and max_depth "depth" @@ -77,7 +82,8 @@ public: really_inline void write_tape_s64(int64_t i) { write_tape(0, 'l'); - tape[current_loc++] = *(reinterpret_cast(&i)); + std::memcpy(&tape[current_loc], &i, sizeof(i)); + ++current_loc; } really_inline void write_tape_u64(uint64_t i) { @@ -113,27 +119,22 @@ public: uint32_t current_loc{0}; uint32_t n_structural_indexes{0}; - uint32_t *structural_indexes; + std::unique_ptr structural_indexes; + + std::unique_ptr tape; + std::unique_ptr containing_scope_offset; - uint64_t *tape; - uint32_t *containing_scope_offset; #ifdef SIMDJSON_USE_COMPUTED_GOTO - void **ret_address; + std::unique_ptr ret_address; #else - char *ret_address; + std::unique_ptr ret_address; #endif - uint8_t *string_buf; // should be at least byte_capacity + std::unique_ptr string_buf;// should be at least byte_capacity uint8_t *current_string_buf_loc; bool valid{false}; int error_code{simdjson::UNITIALIZED}; -private: - // we don't want the default constructor to be called - ParsedJson(const ParsedJson &p) = - delete; // we don't want the default constructor to be called - // we don't want the assignment to be called - ParsedJson &operator=(const ParsedJson &o) = delete; }; // dump bits low to high diff --git a/include/simdjson/parsedjsoniterator.h b/include/simdjson/parsedjsoniterator.h index 6cbf6f8a..707ef5cf 100644 --- a/include/simdjson/parsedjsoniterator.h +++ b/include/simdjson/parsedjsoniterator.h @@ -64,14 +64,14 @@ public: // within the string: get_string_length determines the true string length. inline const char *get_string() const { return reinterpret_cast( - pj->string_buf + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t)); + pj->string_buf.get() + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t)); } // return the length of the string in bytes inline uint32_t get_string_length() const { uint32_t answer; memcpy(&answer, - reinterpret_cast(pj->string_buf + + reinterpret_cast(pj->string_buf.get() + (current_val & JSON_VALUE_MASK)), sizeof(uint32_t)); return answer; diff --git a/scripts/checkperf.sh b/scripts/checkperf.sh index d348d566..8db885d4 100644 --- a/scripts/checkperf.sh +++ b/scripts/checkperf.sh @@ -3,7 +3,7 @@ set -e SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" -if [ -z "$CHECKPERF_REPOSITORY"]; then CHECKPERF_REPOSITORY=.; fi +if [ -z "$CHECKPERF_REPOSITORY" ]; then CHECKPERF_REPOSITORY=.; fi # Arguments: perfdiff.sh if [ -z "$1" ]; then reference_branch="master"; else reference_branch=$1; shift; fi diff --git a/src/generic/stage1_find_marks.h b/src/generic/stage1_find_marks.h index 839b0749..535d5516 100644 --- a/src/generic/stage1_find_marks.h +++ b/src/generic/stage1_find_marks.h @@ -371,7 +371,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p return simdjson::CAPACITY; } utf8_checker utf8_checker{}; - json_structural_scanner scanner{pj.structural_indexes}; + json_structural_scanner scanner{pj.structural_indexes.get()}; scanner.scan(buf, len, utf8_checker); simdjson::ErrorValues error = scanner.detect_errors_on_eof(); @@ -379,7 +379,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p return error; } - pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes; + pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes.get(); /* a valid JSON file cannot have zero structural indexes - we should have * found something */ if (unlikely(pj.n_structural_indexes == 0u)) { diff --git a/src/generic/stringparsing.h b/src/generic/stringparsing.h index b71584d8..fc13168b 100644 --- a/src/generic/stringparsing.h +++ b/src/generic/stringparsing.h @@ -73,7 +73,7 @@ WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset) { - pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); + pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"'); const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); const uint8_t *const start_of_string = dst; diff --git a/src/parsedjson.cpp b/src/parsedjson.cpp index c2d3f7de..91353bb3 100644 --- a/src/parsedjson.cpp +++ b/src/parsedjson.cpp @@ -2,58 +2,6 @@ #include "simdjson/jsonformatutils.h" namespace simdjson { -ParsedJson::ParsedJson() - : structural_indexes(nullptr), tape(nullptr), - containing_scope_offset(nullptr), ret_address(nullptr), - string_buf(nullptr), current_string_buf_loc(nullptr) {} - -ParsedJson::~ParsedJson() { deallocate(); } - -ParsedJson::ParsedJson(ParsedJson &&p) - : byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity), - tape_capacity(p.tape_capacity), string_capacity(p.string_capacity), - current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes), - structural_indexes(p.structural_indexes), tape(p.tape), - containing_scope_offset(p.containing_scope_offset), - ret_address(p.ret_address), string_buf(p.string_buf), - current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) { - p.structural_indexes = nullptr; - p.tape = nullptr; - p.containing_scope_offset = nullptr; - p.ret_address = nullptr; - p.string_buf = nullptr; - p.current_string_buf_loc = nullptr; -} - -ParsedJson &ParsedJson::operator=(ParsedJson &&p) { - byte_capacity = p.byte_capacity; - p.byte_capacity = 0; - depth_capacity = p.depth_capacity; - p.depth_capacity = 0; - tape_capacity = p.tape_capacity; - p.tape_capacity = 0; - string_capacity = p.string_capacity; - p.string_capacity = 0; - current_loc = p.current_loc; - p.current_loc = 0; - n_structural_indexes = p.n_structural_indexes; - p.n_structural_indexes = 0; - structural_indexes = p.structural_indexes; - p.structural_indexes = nullptr; - tape = p.tape; - p.tape = nullptr; - containing_scope_offset = p.containing_scope_offset; - p.containing_scope_offset = nullptr; - ret_address = p.ret_address; - p.ret_address = nullptr; - string_buf = p.string_buf; - p.string_buf = nullptr; - current_string_buf_loc = p.current_string_buf_loc; - p.current_string_buf_loc = nullptr; - valid = p.valid; - p.valid = false; - return *this; -} WARN_UNUSED bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) { @@ -74,7 +22,8 @@ bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) { byte_capacity = 0; // will only set it to len after allocations are a success n_structural_indexes = 0; uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7; - structural_indexes = new (std::nothrow) uint32_t[max_structures]; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures]); + // a pathological input like "[[[[..." would generate len tape elements, so // need a capacity of at least len + 1, but it is also possible to do // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" @@ -84,24 +33,19 @@ bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) { // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64); - string_buf = new (std::nothrow) uint8_t[local_string_capacity]; - tape = new (std::nothrow) uint64_t[local_tape_capacity]; - containing_scope_offset = new (std::nothrow) uint32_t[max_depth]; + string_buf.reset( new (std::nothrow) uint8_t[local_string_capacity]); + tape.reset(new (std::nothrow) uint64_t[local_tape_capacity]); + containing_scope_offset.reset(new (std::nothrow) uint32_t[max_depth]); #ifdef SIMDJSON_USE_COMPUTED_GOTO - ret_address = new (std::nothrow) void *[max_depth]; + //ret_address = new (std::nothrow) void *[max_depth]; + ret_address.reset(new (std::nothrow) void *[max_depth]); #else - ret_address = new (std::nothrow) char[max_depth]; + ret_address.reset(new (std::nothrow) char[max_depth]); #endif - if ((string_buf == nullptr) || (tape == nullptr) || - (containing_scope_offset == nullptr) || (ret_address == nullptr) || - (structural_indexes == nullptr)) { + if (!string_buf || !tape || + !containing_scope_offset || !ret_address || + !structural_indexes) { std::cerr << "Could not allocate memory" << std::endl; - delete[] ret_address; - delete[] containing_scope_offset; - delete[] tape; - delete[] string_buf; - delete[] structural_indexes; - return false; } /* @@ -131,16 +75,16 @@ void ParsedJson::deallocate() { depth_capacity = 0; tape_capacity = 0; string_capacity = 0; - delete[] ret_address; - delete[] containing_scope_offset; - delete[] tape; - delete[] string_buf; - delete[] structural_indexes; + ret_address.reset(); + containing_scope_offset.reset(); + tape.reset(); + string_buf.reset(); + structural_indexes.reset(); valid = false; } void ParsedJson::init() { - current_string_buf_loc = string_buf; + current_string_buf_loc = string_buf.get(); current_loc = 0; valid = false; } @@ -168,8 +112,8 @@ bool ParsedJson::print_json(std::ostream &os) const { return false; } tape_idx++; - bool *in_object = new bool[depth_capacity]; - auto *in_object_idx = new size_t[depth_capacity]; + std::unique_ptr in_object(new bool[depth_capacity]); + std::unique_ptr in_object_idx(new size_t[depth_capacity]); int depth = 1; // only root at level 0 in_object_idx[depth] = 0; in_object[depth] = false; @@ -195,32 +139,26 @@ bool ParsedJson::print_json(std::ostream &os) const { switch (type) { case '"': // we have a string os << '"'; - memcpy(&string_length, string_buf + payload, sizeof(uint32_t)); + memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); print_with_escapes( - (const unsigned char *)(string_buf + payload + sizeof(uint32_t)), + (const unsigned char *)(string_buf.get() + payload + sizeof(uint32_t)), os, string_length); os << '"'; break; case 'l': // we have a long int if (tape_idx + 1 >= how_many) { - delete[] in_object; - delete[] in_object_idx; return false; } os << static_cast(tape[++tape_idx]); break; case 'u': if (tape_idx + 1 >= how_many) { - delete[] in_object; - delete[] in_object_idx; return false; } os << tape[++tape_idx]; break; case 'd': // we have a double if (tape_idx + 1 >= how_many) { - delete[] in_object; - delete[] in_object_idx; return false; } double answer; @@ -258,18 +196,12 @@ bool ParsedJson::print_json(std::ostream &os) const { break; case 'r': // we start and end with the root node fprintf(stderr, "should we be hitting the root node?\n"); - delete[] in_object; - delete[] in_object_idx; return false; default: fprintf(stderr, "bug %c\n", type); - delete[] in_object; - delete[] in_object_idx; return false; } } - delete[] in_object; - delete[] in_object_idx; return true; } @@ -301,9 +233,10 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) const { switch (type) { case '"': // we have a string os << "string \""; - memcpy(&string_length, string_buf + payload, sizeof(uint32_t)); + memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); print_with_escapes( - (const unsigned char *)(string_buf + payload + sizeof(uint32_t)), + (const unsigned char *)(string_buf.get() + payload + sizeof(uint32_t)), + os, string_length); os << '"'; os << '\n';