use unique_ptr in class parsedjson (#417)
* refactor parsedjson to use unique_ptr instead of owning raw pointer * fix a potential undefined behavior * output only first cpu in /proc/cpuinfo
This commit is contained in:
parent
6f799435b6
commit
399d08c86c
|
@ -22,7 +22,7 @@ steps:
|
|||
image: gcc:8
|
||||
environment:
|
||||
CHECKPERF_REPOSITORY: https://github.com/lemire/simdjson
|
||||
commands: [ cat /proc/cpuinfo, make checkperf ]
|
||||
commands: [ sed '/^$/Q' /proc/cpuinfo, make checkperf ]
|
||||
---
|
||||
kind: pipeline
|
||||
name: x64-build
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "simdjson/simdjson.h"
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF
|
||||
|
||||
|
@ -21,10 +22,14 @@ class ParsedJson {
|
|||
public:
|
||||
// create a ParsedJson container with zero capacity, call allocate_capacity to
|
||||
// allocate memory
|
||||
ParsedJson();
|
||||
~ParsedJson();
|
||||
ParsedJson(ParsedJson &&p);
|
||||
ParsedJson &operator=(ParsedJson &&o);
|
||||
ParsedJson()=default;
|
||||
~ParsedJson()=default;
|
||||
|
||||
// this is a move only class
|
||||
ParsedJson(ParsedJson &&p) = default;
|
||||
ParsedJson(const ParsedJson &p) = delete;
|
||||
ParsedJson &operator=(ParsedJson &&o) = default;
|
||||
ParsedJson &operator=(const ParsedJson &o) = delete;
|
||||
|
||||
// if needed, allocate memory so that the object is able to process JSON
|
||||
// documents having up to len bytes and max_depth "depth"
|
||||
|
@ -77,7 +82,8 @@ public:
|
|||
|
||||
really_inline void write_tape_s64(int64_t i) {
|
||||
write_tape(0, 'l');
|
||||
tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
|
||||
std::memcpy(&tape[current_loc], &i, sizeof(i));
|
||||
++current_loc;
|
||||
}
|
||||
|
||||
really_inline void write_tape_u64(uint64_t i) {
|
||||
|
@ -113,27 +119,22 @@ public:
|
|||
uint32_t current_loc{0};
|
||||
uint32_t n_structural_indexes{0};
|
||||
|
||||
uint32_t *structural_indexes;
|
||||
std::unique_ptr<uint32_t[]> structural_indexes;
|
||||
|
||||
std::unique_ptr<uint64_t[]> tape;
|
||||
std::unique_ptr<uint32_t[]> containing_scope_offset;
|
||||
|
||||
uint64_t *tape;
|
||||
uint32_t *containing_scope_offset;
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
void **ret_address;
|
||||
std::unique_ptr<void*[]> ret_address;
|
||||
#else
|
||||
char *ret_address;
|
||||
std::unique_ptr<char[]> ret_address;
|
||||
#endif
|
||||
|
||||
uint8_t *string_buf; // should be at least byte_capacity
|
||||
std::unique_ptr<uint8_t[]> string_buf;// should be at least byte_capacity
|
||||
uint8_t *current_string_buf_loc;
|
||||
bool valid{false};
|
||||
int error_code{simdjson::UNITIALIZED};
|
||||
|
||||
private:
|
||||
// we don't want the default constructor to be called
|
||||
ParsedJson(const ParsedJson &p) =
|
||||
delete; // we don't want the default constructor to be called
|
||||
// we don't want the assignment to be called
|
||||
ParsedJson &operator=(const ParsedJson &o) = delete;
|
||||
};
|
||||
|
||||
// dump bits low to high
|
||||
|
|
|
@ -64,14 +64,14 @@ public:
|
|||
// within the string: get_string_length determines the true string length.
|
||||
inline const char *get_string() const {
|
||||
return reinterpret_cast<const char *>(
|
||||
pj->string_buf + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t));
|
||||
pj->string_buf.get() + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t));
|
||||
}
|
||||
|
||||
// return the length of the string in bytes
|
||||
inline uint32_t get_string_length() const {
|
||||
uint32_t answer;
|
||||
memcpy(&answer,
|
||||
reinterpret_cast<const char *>(pj->string_buf +
|
||||
reinterpret_cast<const char *>(pj->string_buf.get() +
|
||||
(current_val & JSON_VALUE_MASK)),
|
||||
sizeof(uint32_t));
|
||||
return answer;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
set -e
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
|
||||
if [ -z "$CHECKPERF_REPOSITORY"]; then CHECKPERF_REPOSITORY=.; fi
|
||||
if [ -z "$CHECKPERF_REPOSITORY" ]; then CHECKPERF_REPOSITORY=.; fi
|
||||
|
||||
# Arguments: perfdiff.sh <branch> <test json files>
|
||||
if [ -z "$1" ]; then reference_branch="master"; else reference_branch=$1; shift; fi
|
||||
|
|
|
@ -371,7 +371,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
|
|||
return simdjson::CAPACITY;
|
||||
}
|
||||
utf8_checker utf8_checker{};
|
||||
json_structural_scanner scanner{pj.structural_indexes};
|
||||
json_structural_scanner scanner{pj.structural_indexes.get()};
|
||||
scanner.scan<STEP_SIZE>(buf, len, utf8_checker);
|
||||
|
||||
simdjson::ErrorValues error = scanner.detect_errors_on_eof();
|
||||
|
@ -379,7 +379,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
|
|||
return error;
|
||||
}
|
||||
|
||||
pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes;
|
||||
pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes.get();
|
||||
/* a valid JSON file cannot have zero structural indexes - we should have
|
||||
* found something */
|
||||
if (unlikely(pj.n_structural_indexes == 0u)) {
|
||||
|
|
|
@ -73,7 +73,7 @@ WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
|
|||
UNUSED size_t len, ParsedJson &pj,
|
||||
UNUSED const uint32_t depth,
|
||||
UNUSED uint32_t offset) {
|
||||
pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
|
||||
pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"');
|
||||
const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */
|
||||
uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t);
|
||||
const uint8_t *const start_of_string = dst;
|
||||
|
|
|
@ -2,58 +2,6 @@
|
|||
#include "simdjson/jsonformatutils.h"
|
||||
|
||||
namespace simdjson {
|
||||
ParsedJson::ParsedJson()
|
||||
: structural_indexes(nullptr), tape(nullptr),
|
||||
containing_scope_offset(nullptr), ret_address(nullptr),
|
||||
string_buf(nullptr), current_string_buf_loc(nullptr) {}
|
||||
|
||||
ParsedJson::~ParsedJson() { deallocate(); }
|
||||
|
||||
ParsedJson::ParsedJson(ParsedJson &&p)
|
||||
: byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity),
|
||||
tape_capacity(p.tape_capacity), string_capacity(p.string_capacity),
|
||||
current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes),
|
||||
structural_indexes(p.structural_indexes), tape(p.tape),
|
||||
containing_scope_offset(p.containing_scope_offset),
|
||||
ret_address(p.ret_address), string_buf(p.string_buf),
|
||||
current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) {
|
||||
p.structural_indexes = nullptr;
|
||||
p.tape = nullptr;
|
||||
p.containing_scope_offset = nullptr;
|
||||
p.ret_address = nullptr;
|
||||
p.string_buf = nullptr;
|
||||
p.current_string_buf_loc = nullptr;
|
||||
}
|
||||
|
||||
ParsedJson &ParsedJson::operator=(ParsedJson &&p) {
|
||||
byte_capacity = p.byte_capacity;
|
||||
p.byte_capacity = 0;
|
||||
depth_capacity = p.depth_capacity;
|
||||
p.depth_capacity = 0;
|
||||
tape_capacity = p.tape_capacity;
|
||||
p.tape_capacity = 0;
|
||||
string_capacity = p.string_capacity;
|
||||
p.string_capacity = 0;
|
||||
current_loc = p.current_loc;
|
||||
p.current_loc = 0;
|
||||
n_structural_indexes = p.n_structural_indexes;
|
||||
p.n_structural_indexes = 0;
|
||||
structural_indexes = p.structural_indexes;
|
||||
p.structural_indexes = nullptr;
|
||||
tape = p.tape;
|
||||
p.tape = nullptr;
|
||||
containing_scope_offset = p.containing_scope_offset;
|
||||
p.containing_scope_offset = nullptr;
|
||||
ret_address = p.ret_address;
|
||||
p.ret_address = nullptr;
|
||||
string_buf = p.string_buf;
|
||||
p.string_buf = nullptr;
|
||||
current_string_buf_loc = p.current_string_buf_loc;
|
||||
p.current_string_buf_loc = nullptr;
|
||||
valid = p.valid;
|
||||
p.valid = false;
|
||||
return *this;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
|
||||
|
@ -74,7 +22,8 @@ bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
|
|||
byte_capacity = 0; // will only set it to len after allocations are a success
|
||||
n_structural_indexes = 0;
|
||||
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
|
||||
structural_indexes = new (std::nothrow) uint32_t[max_structures];
|
||||
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures]);
|
||||
|
||||
// a pathological input like "[[[[..." would generate len tape elements, so
|
||||
// need a capacity of at least len + 1, but it is also possible to do
|
||||
// worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"
|
||||
|
@ -84,24 +33,19 @@ bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
|
|||
// a document with only zero-length strings... could have len/3 string
|
||||
// and we would need len/3 * 5 bytes on the string buffer
|
||||
size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64);
|
||||
string_buf = new (std::nothrow) uint8_t[local_string_capacity];
|
||||
tape = new (std::nothrow) uint64_t[local_tape_capacity];
|
||||
containing_scope_offset = new (std::nothrow) uint32_t[max_depth];
|
||||
string_buf.reset( new (std::nothrow) uint8_t[local_string_capacity]);
|
||||
tape.reset(new (std::nothrow) uint64_t[local_tape_capacity]);
|
||||
containing_scope_offset.reset(new (std::nothrow) uint32_t[max_depth]);
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
ret_address = new (std::nothrow) void *[max_depth];
|
||||
//ret_address = new (std::nothrow) void *[max_depth];
|
||||
ret_address.reset(new (std::nothrow) void *[max_depth]);
|
||||
#else
|
||||
ret_address = new (std::nothrow) char[max_depth];
|
||||
ret_address.reset(new (std::nothrow) char[max_depth]);
|
||||
#endif
|
||||
if ((string_buf == nullptr) || (tape == nullptr) ||
|
||||
(containing_scope_offset == nullptr) || (ret_address == nullptr) ||
|
||||
(structural_indexes == nullptr)) {
|
||||
if (!string_buf || !tape ||
|
||||
!containing_scope_offset || !ret_address ||
|
||||
!structural_indexes) {
|
||||
std::cerr << "Could not allocate memory" << std::endl;
|
||||
delete[] ret_address;
|
||||
delete[] containing_scope_offset;
|
||||
delete[] tape;
|
||||
delete[] string_buf;
|
||||
delete[] structural_indexes;
|
||||
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
|
@ -131,16 +75,16 @@ void ParsedJson::deallocate() {
|
|||
depth_capacity = 0;
|
||||
tape_capacity = 0;
|
||||
string_capacity = 0;
|
||||
delete[] ret_address;
|
||||
delete[] containing_scope_offset;
|
||||
delete[] tape;
|
||||
delete[] string_buf;
|
||||
delete[] structural_indexes;
|
||||
ret_address.reset();
|
||||
containing_scope_offset.reset();
|
||||
tape.reset();
|
||||
string_buf.reset();
|
||||
structural_indexes.reset();
|
||||
valid = false;
|
||||
}
|
||||
|
||||
void ParsedJson::init() {
|
||||
current_string_buf_loc = string_buf;
|
||||
current_string_buf_loc = string_buf.get();
|
||||
current_loc = 0;
|
||||
valid = false;
|
||||
}
|
||||
|
@ -168,8 +112,8 @@ bool ParsedJson::print_json(std::ostream &os) const {
|
|||
return false;
|
||||
}
|
||||
tape_idx++;
|
||||
bool *in_object = new bool[depth_capacity];
|
||||
auto *in_object_idx = new size_t[depth_capacity];
|
||||
std::unique_ptr<bool[]> in_object(new bool[depth_capacity]);
|
||||
std::unique_ptr<size_t[]> in_object_idx(new size_t[depth_capacity]);
|
||||
int depth = 1; // only root at level 0
|
||||
in_object_idx[depth] = 0;
|
||||
in_object[depth] = false;
|
||||
|
@ -195,32 +139,26 @@ bool ParsedJson::print_json(std::ostream &os) const {
|
|||
switch (type) {
|
||||
case '"': // we have a string
|
||||
os << '"';
|
||||
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
||||
memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
|
||||
print_with_escapes(
|
||||
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
||||
(const unsigned char *)(string_buf.get() + payload + sizeof(uint32_t)),
|
||||
os, string_length);
|
||||
os << '"';
|
||||
break;
|
||||
case 'l': // we have a long int
|
||||
if (tape_idx + 1 >= how_many) {
|
||||
delete[] in_object;
|
||||
delete[] in_object_idx;
|
||||
return false;
|
||||
}
|
||||
os << static_cast<int64_t>(tape[++tape_idx]);
|
||||
break;
|
||||
case 'u':
|
||||
if (tape_idx + 1 >= how_many) {
|
||||
delete[] in_object;
|
||||
delete[] in_object_idx;
|
||||
return false;
|
||||
}
|
||||
os << tape[++tape_idx];
|
||||
break;
|
||||
case 'd': // we have a double
|
||||
if (tape_idx + 1 >= how_many) {
|
||||
delete[] in_object;
|
||||
delete[] in_object_idx;
|
||||
return false;
|
||||
}
|
||||
double answer;
|
||||
|
@ -258,18 +196,12 @@ bool ParsedJson::print_json(std::ostream &os) const {
|
|||
break;
|
||||
case 'r': // we start and end with the root node
|
||||
fprintf(stderr, "should we be hitting the root node?\n");
|
||||
delete[] in_object;
|
||||
delete[] in_object_idx;
|
||||
return false;
|
||||
default:
|
||||
fprintf(stderr, "bug %c\n", type);
|
||||
delete[] in_object;
|
||||
delete[] in_object_idx;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
delete[] in_object;
|
||||
delete[] in_object_idx;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -301,9 +233,10 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) const {
|
|||
switch (type) {
|
||||
case '"': // we have a string
|
||||
os << "string \"";
|
||||
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
||||
memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
|
||||
print_with_escapes(
|
||||
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
||||
(const unsigned char *)(string_buf.get() + payload + sizeof(uint32_t)),
|
||||
os,
|
||||
string_length);
|
||||
os << '"';
|
||||
os << '\n';
|
||||
|
|
Loading…
Reference in New Issue