Make #defines into simdjson::constants

This commit is contained in:
John Keiser 2020-03-06 16:46:41 -08:00
parent ac0899c043
commit a5afec1f94
11 changed files with 63 additions and 32 deletions

View File

@ -62,10 +62,10 @@ simdjson_compute_stats(const simdjson::padded_string &p) {
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
size_t how_many = 0; size_t how_many = 0;
assert(type == 'r'); assert(type == 'r');
how_many = tape_val & JSON_VALUE_MASK; how_many = tape_val & simdjson::internal::JSON_VALUE_MASK;
for (; tape_idx < how_many; tape_idx++) { for (; tape_idx < how_many; tape_idx++) {
tape_val = pj.doc.tape[tape_idx]; tape_val = pj.doc.tape[tape_idx];
// uint64_t payload = tape_val & JSON_VALUE_MASK; // uint64_t payload = tape_val & simdjson::internal::JSON_VALUE_MASK;
type = (tape_val >> 56); type = (tape_val >> 56);
switch (type) { switch (type) {
case 'l': // we have a long int case 'l': // we have a long int

View File

@ -67,10 +67,10 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
size_t how_many = 0; size_t how_many = 0;
assert(type == 'r'); assert(type == 'r');
how_many = tape_val & JSON_VALUE_MASK; how_many = tape_val & simdjson::internal::JSON_VALUE_MASK;
for (; tape_idx < how_many; tape_idx++) { for (; tape_idx < how_many; tape_idx++) {
tape_val = pj.doc.tape[tape_idx]; tape_val = pj.doc.tape[tape_idx];
// uint64_t payload = tape_val & JSON_VALUE_MASK; // uint64_t payload = tape_val & simdjson::internal::JSON_VALUE_MASK;
type = (tape_val >> 56); type = (tape_val >> 56);
switch (type) { switch (type) {
case 'l': // we have a long int case 'l': // we have a long int

View File

@ -4,14 +4,29 @@
#include <cassert> #include <cassert>
#include "simdjson/portability.h" #include "simdjson/portability.h"
// we support documents up to 4GB namespace simdjson {
#define SIMDJSON_MAXSIZE_BYTES 0xFFFFFFFF
// the input buf should be readable up to buf + SIMDJSON_PADDING /** The maximum document size supported by simdjson. */
// this is a stopgap; there should be a better description of the constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF;
// main loop and its behavior that abstracts over this
// See https://github.com/lemire/simdjson/issues/174 /**
#define SIMDJSON_PADDING 32 * The amount of padding needed in a buffer to parse JSON.
*
* the input buf should be readable up to buf + SIMDJSON_PADDING
* this is a stopgap; there should be a better description of the
* main loop and its behavior that abstracts over this
* See https://github.com/lemire/simdjson/issues/174
*/
constexpr size_t SIMDJSON_PADDING = 32;
/**
* By default, simdjson supports this many nested objects and arrays.
*
* This is the default for document::parser::max_depth().
*/
constexpr size_t DEFAULT_MAX_DEPTH = 1024;
} // namespace simdjson
#if defined(__GNUC__) #if defined(__GNUC__)
// Marks a block with a name so that MCA analysis can see it. // Marks a block with a name so that MCA analysis can see it.

View File

@ -9,11 +9,12 @@
#include "simdjson/simdjson.h" #include "simdjson/simdjson.h"
#include "simdjson/padded_string.h" #include "simdjson/padded_string.h"
#define JSON_VALUE_MASK 0x00FFFFFFFFFFFFFF
#define DEFAULT_MAX_DEPTH 1024 // a JSON document with a depth exceeding 1024 is probably de facto invalid
namespace simdjson { namespace simdjson {
namespace internal {
constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF;
}
template<size_t max_depth> class document_iterator; template<size_t max_depth> class document_iterator;
/** /**

View File

@ -67,7 +67,7 @@ public:
// within the string: get_string_length determines the true string length. // within the string: get_string_length determines the true string length.
inline const char *get_string() const { inline const char *get_string() const {
return reinterpret_cast<const char *>( return reinterpret_cast<const char *>(
doc.string_buf.get() + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t)); doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t));
} }
// return the length of the string in bytes // return the length of the string in bytes
@ -75,7 +75,7 @@ public:
uint32_t answer; uint32_t answer;
memcpy(&answer, memcpy(&answer,
reinterpret_cast<const char *>(doc.string_buf.get() + reinterpret_cast<const char *>(doc.string_buf.get() +
(current_val & JSON_VALUE_MASK)), (current_val & internal::JSON_VALUE_MASK)),
sizeof(uint32_t)); sizeof(uint32_t));
return answer; return answer;
} }

View File

@ -247,7 +247,7 @@ inline bool document::print_json(std::ostream &os, size_t max_depth) const noexc
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
size_t how_many = 0; size_t how_many = 0;
if (type == 'r') { if (type == 'r') {
how_many = tape_val & JSON_VALUE_MASK; how_many = tape_val & internal::JSON_VALUE_MASK;
} else { } else {
// Error: no starting root node? // Error: no starting root node?
return false; return false;
@ -260,7 +260,7 @@ inline bool document::print_json(std::ostream &os, size_t max_depth) const noexc
in_object[depth] = false; in_object[depth] = false;
for (; tape_idx < how_many; tape_idx++) { for (; tape_idx < how_many; tape_idx++) {
tape_val = tape[tape_idx]; tape_val = tape[tape_idx];
uint64_t payload = tape_val & JSON_VALUE_MASK; uint64_t payload = tape_val & internal::JSON_VALUE_MASK;
type = (tape_val >> 56); type = (tape_val >> 56);
if (!in_object[depth]) { if (!in_object[depth]) {
if ((in_object_idx[depth] > 0) && (type != ']')) { if ((in_object_idx[depth] > 0) && (type != ']')) {
@ -355,7 +355,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
tape_idx++; tape_idx++;
size_t how_many = 0; size_t how_many = 0;
if (type == 'r') { if (type == 'r') {
how_many = tape_val & JSON_VALUE_MASK; how_many = tape_val & internal::JSON_VALUE_MASK;
} else { } else {
// Error: no starting root node? // Error: no starting root node?
return false; return false;
@ -365,7 +365,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
for (; tape_idx < how_many; tape_idx++) { for (; tape_idx < how_many; tape_idx++) {
os << tape_idx << " : "; os << tape_idx << " : ";
tape_val = tape[tape_idx]; tape_val = tape[tape_idx];
payload = tape_val & JSON_VALUE_MASK; payload = tape_val & internal::JSON_VALUE_MASK;
type = (tape_val >> 56); type = (tape_val >> 56);
switch (type) { switch (type) {
case '"': // we have a string case '"': // we have a string
@ -432,7 +432,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
} }
} }
tape_val = tape[tape_idx]; tape_val = tape[tape_idx];
payload = tape_val & JSON_VALUE_MASK; payload = tape_val & internal::JSON_VALUE_MASK;
type = (tape_val >> 56); type = (tape_val >> 56);
os << tape_idx << " : " << type << "\t// pointing to " << payload os << tape_idx << " : " << type << "\t// pointing to " << payload
<< " (start root)\n"; << " (start root)\n";
@ -685,7 +685,7 @@ really_inline document::tape_type document::tape_ref::type() const noexcept {
return static_cast<tape_type>(doc->tape[json_index] >> 56); return static_cast<tape_type>(doc->tape[json_index] >> 56);
} }
really_inline uint64_t document::tape_ref::tape_value() const noexcept { really_inline uint64_t document::tape_ref::tape_value() const noexcept {
return doc->tape[json_index] & JSON_VALUE_MASK; return doc->tape[json_index] & internal::JSON_VALUE_MASK;
} }
template<typename T> template<typename T>
really_inline T document::tape_ref::next_tape_value() const noexcept { really_inline T document::tape_ref::next_tape_value() const noexcept {

View File

@ -150,7 +150,7 @@ template <size_t max_depth> bool document_iterator<max_depth>::prev() {
oldnpos = npos; oldnpos = npos;
if ((current_type == '[') || (current_type == '{')) { if ((current_type == '[') || (current_type == '{')) {
// we need to jump // we need to jump
npos = (current_val & JSON_VALUE_MASK); npos = (current_val & internal::JSON_VALUE_MASK);
} else { } else {
npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
} }
@ -179,7 +179,7 @@ template <size_t max_depth> bool document_iterator<max_depth>::down() {
return false; return false;
} }
if ((current_type == '[') || (current_type == '{')) { if ((current_type == '[') || (current_type == '{')) {
size_t npos = (current_val & JSON_VALUE_MASK); size_t npos = (current_val & internal::JSON_VALUE_MASK);
if (npos == location + 2) { if (npos == location + 2) {
return false; // we have an empty scope return false; // we have an empty scope
} }
@ -206,7 +206,7 @@ template <size_t max_depth> bool document_iterator<max_depth>::next() {
size_t npos; size_t npos;
if ((current_type == '[') || (current_type == '{')) { if ((current_type == '[') || (current_type == '{')) {
// we need to jump // we need to jump
npos = (current_val & JSON_VALUE_MASK); npos = (current_val & internal::JSON_VALUE_MASK);
} else { } else {
npos = location + (is_number() ? 2 : 1); npos = location + (is_number() ? 2 : 1);
} }
@ -228,7 +228,7 @@ document_iterator<max_depth>::document_iterator(const document &doc_) noexcept
current_val = doc.tape[location++]; current_val = doc.tape[location++];
current_type = (current_val >> 56); current_type = (current_val >> 56);
depth_index[0].scope_type = current_type; depth_index[0].scope_type = current_type;
tape_length = current_val & JSON_VALUE_MASK; tape_length = current_val & internal::JSON_VALUE_MASK;
if (location < tape_length) { if (location < tape_length) {
// If we make it here, then depth_capacity must >=2, but the compiler // If we make it here, then depth_capacity must >=2, but the compiler
// may not know this. // may not know this.
@ -456,7 +456,7 @@ bool document_iterator<max_depth>::relative_move_to(const char *pointer,
size_t npos; size_t npos;
if ((current_type == '[') || (current_type == '{')) { if ((current_type == '[') || (current_type == '{')) {
// we need to jump // we need to jump
npos = (current_val & JSON_VALUE_MASK); npos = (current_val & internal::JSON_VALUE_MASK);
} else { } else {
npos = npos =
location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);

View File

@ -19,4 +19,4 @@ inline padded_string get_corpus(const std::string &filename) {
} // namespace simdjson } // namespace simdjson
#endif #endif // SIMDJSON_JSONIOUTIL_H

View File

@ -26,5 +26,7 @@ static inline size_t json_minify(const std::string_view &p, char *out) {
static inline size_t json_minify(const padded_string &p, char *out) { static inline size_t json_minify(const padded_string &p, char *out) {
return json_minify(p.data(), p.size(), out); return json_minify(p.data(), p.size(), out);
} }
} // namespace simdjson } // namespace simdjson
#endif
#endif // SIMDJSON_JSONMINIFIER_H

View File

@ -2,12 +2,25 @@
// do not change by hand // do not change by hand
#ifndef SIMDJSON_SIMDJSON_VERSION_H #ifndef SIMDJSON_SIMDJSON_VERSION_H
#define SIMDJSON_SIMDJSON_VERSION_H #define SIMDJSON_SIMDJSON_VERSION_H
/** The version of simdjson being used (major.minor.revision) */
#define SIMDJSON_VERSION 0.2.1 #define SIMDJSON_VERSION 0.2.1
namespace simdjson { namespace simdjson {
enum { enum {
/**
* The major version (MAJOR.minor.revision) of simdjson being used.
*/
SIMDJSON_VERSION_MAJOR = 0, SIMDJSON_VERSION_MAJOR = 0,
/**
* The minor version (major.MINOR.revision) of simdjson being used.
*/
SIMDJSON_VERSION_MINOR = 2, SIMDJSON_VERSION_MINOR = 2,
/**
* The revision (major.minor.REVISION) of simdjson being used.
*/
SIMDJSON_VERSION_REVISION = 1 SIMDJSON_VERSION_REVISION = 1
}; };
} } // namespace simdjson
#endif // SIMDJSON_SIMDJSON_VERSION_H #endif // SIMDJSON_SIMDJSON_VERSION_H

View File

@ -63,10 +63,10 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
size_t how_many = 0; size_t how_many = 0;
assert(type == 'r'); assert(type == 'r');
how_many = tape_val & JSON_VALUE_MASK; how_many = tape_val & simdjson::internal::JSON_VALUE_MASK;
for (; tape_idx < how_many; tape_idx++) { for (; tape_idx < how_many; tape_idx++) {
tape_val = pj.doc.tape[tape_idx]; tape_val = pj.doc.tape[tape_idx];
// uint64_t payload = tape_val & JSON_VALUE_MASK; // uint64_t payload = tape_val & simdjson::internal::JSON_VALUE_MASK;
type = (tape_val >> 56); type = (tape_val >> 56);
switch (type) { switch (type) {
case 'l': // we have a long int case 'l': // we have a long int