Improve documentation on padding
- Improves and clarifies the documentation on padding. - Use std:: prefix for memcpy, strlen etc. Related to issues #1175 and #1178
This commit is contained in:
parent
19cb5d57db
commit
f410213003
|
@ -8,7 +8,7 @@
|
||||||
#include <unistd.h> // for syscall
|
#include <unistd.h> // for syscall
|
||||||
|
|
||||||
#include <cerrno> // for errno
|
#include <cerrno> // for errno
|
||||||
#include <cstring> // for memset
|
#include <cstring> // for std::memset
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
@ -24,7 +24,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
|
explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
|
||||||
memset(&attribs, 0, sizeof(attribs));
|
std::memset(&attribs, 0, sizeof(attribs));
|
||||||
attribs.type = TYPE;
|
attribs.type = TYPE;
|
||||||
attribs.size = sizeof(attribs);
|
attribs.size = sizeof(attribs);
|
||||||
attribs.disabled = 1;
|
attribs.disabled = 1;
|
||||||
|
|
|
@ -296,8 +296,8 @@ simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const ch
|
||||||
}
|
}
|
||||||
|
|
||||||
sax_tweet_reader_visitor::field_lookup::field_lookup() {
|
sax_tweet_reader_visitor::field_lookup::field_lookup() {
|
||||||
add("\"statuses\"", strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
|
add("\"statuses\"", std::strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
|
||||||
#define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
|
#define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
|
||||||
TWEET_FIELD(id, field_type::unsigned_integer);
|
TWEET_FIELD(id, field_type::unsigned_integer);
|
||||||
TWEET_FIELD(in_reply_to_status_id, field_type::nullable_unsigned_integer);
|
TWEET_FIELD(in_reply_to_status_id, field_type::nullable_unsigned_integer);
|
||||||
TWEET_FIELD(retweet_count, field_type::unsigned_integer);
|
TWEET_FIELD(retweet_count, field_type::unsigned_integer);
|
||||||
|
@ -306,7 +306,7 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() {
|
||||||
TWEET_FIELD(created_at, field_type::string);
|
TWEET_FIELD(created_at, field_type::string);
|
||||||
TWEET_FIELD(user, field_type::object)
|
TWEET_FIELD(user, field_type::object)
|
||||||
#undef TWEET_FIELD
|
#undef TWEET_FIELD
|
||||||
#define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
|
#define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
|
||||||
USER_FIELD(id, field_type::unsigned_integer);
|
USER_FIELD(id, field_type::unsigned_integer);
|
||||||
USER_FIELD(screen_name, field_type::string);
|
USER_FIELD(screen_name, field_type::string);
|
||||||
#undef USER_FIELD
|
#undef USER_FIELD
|
||||||
|
|
|
@ -278,7 +278,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but
|
||||||
// Starts with a valid JSON document as a string.
|
// Starts with a valid JSON document as a string.
|
||||||
// It does not have to be null-terminated.
|
// It does not have to be null-terminated.
|
||||||
const char * some_string = "[ 1, 2, 3, 4] ";
|
const char * some_string = "[ 1, 2, 3, 4] ";
|
||||||
size_t length = strlen(some_string);
|
size_t length = std::strlen(some_string);
|
||||||
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
|
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
|
||||||
std::unique_ptr<char[]> buffer{new char[length]};
|
std::unique_ptr<char[]> buffer{new char[length]};
|
||||||
size_t new_length{}; // It will receive the minified length.
|
size_t new_length{}; // It will receive the minified length.
|
||||||
|
@ -296,7 +296,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many
|
||||||
|
|
||||||
```C++
|
```C++
|
||||||
const char * some_string = "[ 1, 2, 3, 4] ";
|
const char * some_string = "[ 1, 2, 3, 4] ";
|
||||||
size_t length = strlen(some_string);
|
size_t length = std::strlen(some_string);
|
||||||
bool is_ok = simdjson::validate_utf8(some_string, length);
|
bool is_ok = simdjson::validate_utf8(some_string, length);
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -260,7 +260,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but
|
||||||
// Starts with a valid JSON document as a string.
|
// Starts with a valid JSON document as a string.
|
||||||
// It does not have to be null-terminated.
|
// It does not have to be null-terminated.
|
||||||
const char * some_string = "[ 1, 2, 3, 4] ";
|
const char * some_string = "[ 1, 2, 3, 4] ";
|
||||||
size_t length = strlen(some_string);
|
size_t length = std::strlen(some_string);
|
||||||
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
|
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
|
||||||
std::unique_ptr<char[]> buffer{new char[length]};
|
std::unique_ptr<char[]> buffer{new char[length]};
|
||||||
size_t new_length{}; // It will receive the minified length.
|
size_t new_length{}; // It will receive the minified length.
|
||||||
|
@ -278,7 +278,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many
|
||||||
|
|
||||||
```
|
```
|
||||||
const char * some_string = "[ 1, 2, 3, 4] ";
|
const char * some_string = "[ 1, 2, 3, 4] ";
|
||||||
size_t length = strlen(some_string);
|
size_t length = std::strlen(some_string);
|
||||||
bool is_ok = simdjson::validate_utf8(some_string, length);
|
bool is_ok = simdjson::validate_utf8(some_string, length);
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,8 @@ are still some scenarios where tuning can enhance performance.
|
||||||
* [Visual Studio](#visual-studio)
|
* [Visual Studio](#visual-studio)
|
||||||
* [Downclocking](#downclocking)
|
* [Downclocking](#downclocking)
|
||||||
* [Best Use of the DOM API](#best-use-of-the-dom-api)
|
* [Best Use of the DOM API](#best-use-of-the-dom-api)
|
||||||
|
* [Padding and Temporary Copies](#padding-and-temporary-copies)
|
||||||
|
|
||||||
|
|
||||||
Reusing the parser for maximum efficiency
|
Reusing the parser for maximum efficiency
|
||||||
-----------------------------------------
|
-----------------------------------------
|
||||||
|
@ -174,3 +176,25 @@ Best Use of the DOM API
|
||||||
|
|
||||||
The simdjson API provides access to the JSON DOM (document-object-model) content as a tree of `dom::element` instances, each representing an object, an array or an atomic type (null, true, false, number). These `dom::element` instances are lightweight objects (e.g., spanning 16 bytes) and it might be advantageous to pass them by value, as opposed to passing them by reference or by pointer.
|
The simdjson API provides access to the JSON DOM (document-object-model) content as a tree of `dom::element` instances, each representing an object, an array or an atomic type (null, true, false, number). These `dom::element` instances are lightweight objects (e.g., spanning 16 bytes) and it might be advantageous to pass them by value, as opposed to passing them by reference or by pointer.
|
||||||
|
|
||||||
|
Padding and Temporary Copies
|
||||||
|
--------------
|
||||||
|
|
||||||
|
The simdjson function `parser.parse` reads data from a padded buffer, containing SIMDJSON_PADDING extra bytes added at the end.
|
||||||
|
If you are passing a `padded_string` to `parser.parse` or loading the JSON directly from
|
||||||
|
disk (`parser.load`), padding is automatically handled.
|
||||||
|
When calling `parser.parse` on a pointer (e.g., `parser.parse(mystring, mylength)`) a temporary copy is made by default with adequate padding and you, again, do not need to be concerned with padding.
|
||||||
|
|
||||||
|
Some users may not be able use our `padded_string` class or to load the data directly from disk (`parser.load`). They may need to pass data pointers to the library. If these users wish to avoid temporary copies and corresponding temporary memory allocations, they may want to call `parser.parse` with the `realloc_if_needed` parameter set to false (e.g., `parser.parse(mystring, mylength, false)`). In such cases, they need to ensure that there are at least SIMDJSON_PADDING extra bytes at the end that can be safely accessed and read. They do not need to initialize the padded bytes to any value in particular. The following example is safe:
|
||||||
|
|
||||||
|
|
||||||
|
```C++
|
||||||
|
const char *json = R"({"key":"value"})";
|
||||||
|
const size_t json_len = std::strlen(json);
|
||||||
|
std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
|
||||||
|
memcpy(padded_json_copy.get(), json, json_len);
|
||||||
|
memset(padded_json_copy.get() + json_len, 0, SIMDJSON_PADDING);
|
||||||
|
simdjson::dom::parser parser;
|
||||||
|
simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
|
||||||
|
````
|
||||||
|
|
||||||
|
Setting the `realloc_if_needed` parameter false in this manner may lead to better performance, but it requires that the user takes more responsibilities: the simdjson library cannot verify that the input buffer was padded.
|
|
@ -66,7 +66,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case '"': // we have a string
|
case '"': // we have a string
|
||||||
os << "string \"";
|
os << "string \"";
|
||||||
memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
|
std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
|
||||||
os << internal::escape_json_string(std::string_view(
|
os << internal::escape_json_string(std::string_view(
|
||||||
(const char *)(string_buf.get() + payload + sizeof(uint32_t)),
|
(const char *)(string_buf.get() + payload + sizeof(uint32_t)),
|
||||||
string_length
|
string_length
|
||||||
|
@ -92,7 +92,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
double answer;
|
double answer;
|
||||||
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
std::memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
||||||
os << answer << '\n';
|
os << answer << '\n';
|
||||||
break;
|
break;
|
||||||
case 'n': // we have a null
|
case 'n': // we have a null
|
||||||
|
|
|
@ -252,7 +252,7 @@ dom::parser::Iterator::Iterator(
|
||||||
current_val(o.current_val)
|
current_val(o.current_val)
|
||||||
{
|
{
|
||||||
depth_index = new scopeindex_t[max_depth+1];
|
depth_index = new scopeindex_t[max_depth+1];
|
||||||
memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
|
std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
dom::parser::Iterator::~Iterator() noexcept {
|
dom::parser::Iterator::~Iterator() noexcept {
|
||||||
|
|
|
@ -78,7 +78,7 @@ public:
|
||||||
// return the length of the string in bytes
|
// return the length of the string in bytes
|
||||||
inline uint32_t get_string_length() const {
|
inline uint32_t get_string_length() const {
|
||||||
uint32_t answer;
|
uint32_t answer;
|
||||||
memcpy(&answer,
|
std::memcpy(&answer,
|
||||||
reinterpret_cast<const char *>(doc.string_buf.get() +
|
reinterpret_cast<const char *>(doc.string_buf.get() +
|
||||||
(current_val & internal::JSON_VALUE_MASK)),
|
(current_val & internal::JSON_VALUE_MASK)),
|
||||||
sizeof(uint32_t));
|
sizeof(uint32_t));
|
||||||
|
@ -93,7 +93,7 @@ public:
|
||||||
// case of error
|
// case of error
|
||||||
}
|
}
|
||||||
double answer;
|
double answer;
|
||||||
memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
|
std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -98,7 +98,7 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
|
||||||
if (realloc_if_needed) {
|
if (realloc_if_needed) {
|
||||||
tmp_buf.reset((uint8_t *)internal::allocate_padded_buffer(len));
|
tmp_buf.reset((uint8_t *)internal::allocate_padded_buffer(len));
|
||||||
if (tmp_buf.get() == nullptr) { return MEMALLOC; }
|
if (tmp_buf.get() == nullptr) { return MEMALLOC; }
|
||||||
memcpy((void *)tmp_buf.get(), buf, len);
|
std::memcpy((void *)tmp_buf.get(), buf, len);
|
||||||
}
|
}
|
||||||
_error = implementation->parse(realloc_if_needed ? tmp_buf.get() : buf, len, doc);
|
_error = implementation->parse(realloc_if_needed ? tmp_buf.get() : buf, len, doc);
|
||||||
if (_error) { return _error; }
|
if (_error) { return _error; }
|
||||||
|
|
|
@ -114,8 +114,30 @@ public:
|
||||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||||
* those bytes are initialized to, as long as they are allocated.
|
* those bytes are initialized to, as long as they are allocated.
|
||||||
*
|
*
|
||||||
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
|
* If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
|
||||||
* and it is copied into an enlarged temporary buffer before parsing.
|
* and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
|
||||||
|
*
|
||||||
|
* const char *json = R"({"key":"value"})";
|
||||||
|
* const size_t json_len = std::strlen(json);
|
||||||
|
* simdjson::dom::parser parser;
|
||||||
|
* simdjson::dom::element element = parser.parse(json, json_len);
|
||||||
|
*
|
||||||
|
* If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
|
||||||
|
* you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
|
||||||
|
* The benefit of setting realloc_if_needed to false is that you avoid a temporary
|
||||||
|
* memory allocation and a copy.
|
||||||
|
*
|
||||||
|
* The padded bytes may be read. It is not important how you initialize
|
||||||
|
* these bytes though we recommend a sensible default like null character values or spaces.
|
||||||
|
* For example, the following low-level code is safe:
|
||||||
|
*
|
||||||
|
* const char *json = R"({"key":"value"})";
|
||||||
|
* const size_t json_len = std::strlen(json);
|
||||||
|
* std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
|
||||||
|
* std::memcpy(padded_json_copy.get(), json, json_len);
|
||||||
|
* std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
|
||||||
|
* simdjson::dom::parser parser;
|
||||||
|
* simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
|
||||||
*
|
*
|
||||||
* ### Parser Capacity
|
* ### Parser Capacity
|
||||||
*
|
*
|
||||||
|
|
|
@ -43,7 +43,7 @@ enum error_code {
|
||||||
*
|
*
|
||||||
* dom::parser parser;
|
* dom::parser parser;
|
||||||
* dom::element doc;
|
* dom::element doc;
|
||||||
* auto error = parser.parse("foo").get(doc);
|
* auto error = parser.parse("foo",3).get(doc);
|
||||||
* if (error) { printf("Error: %s\n", error_message(error)); }
|
* if (error) { printf("Error: %s\n", error_message(error)); }
|
||||||
*
|
*
|
||||||
* @return The error message.
|
* @return The error message.
|
||||||
|
|
|
@ -81,14 +81,14 @@ simdjson_really_inline T tape_ref::next_tape_value() const noexcept {
|
||||||
// It is not generally safe. It is safer, and often faster to rely
|
// It is not generally safe. It is safer, and often faster to rely
|
||||||
// on memcpy. Yes, it is uglier, but it is also encapsulated.
|
// on memcpy. Yes, it is uglier, but it is also encapsulated.
|
||||||
T x;
|
T x;
|
||||||
memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
|
std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
|
simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
|
||||||
size_t string_buf_index = size_t(tape_value());
|
size_t string_buf_index = size_t(tape_value());
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
|
std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ inline char *allocate_padded_buffer(size_t length) noexcept {
|
||||||
// We write zeroes in the padded region to avoid having uninitized
|
// We write zeroes in the padded region to avoid having uninitized
|
||||||
// garbage. If nothing else, garbage getting read might trigger a
|
// garbage. If nothing else, garbage getting read might trigger a
|
||||||
// warning in a memory checking.
|
// warning in a memory checking.
|
||||||
memset(padded_buffer + length, 0, totalpaddedlength - length);
|
std::memset(padded_buffer + length, 0, totalpaddedlength - length);
|
||||||
return padded_buffer;
|
return padded_buffer;
|
||||||
} // allocate_padded_buffer()
|
} // allocate_padded_buffer()
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ inline padded_string::padded_string(size_t length) noexcept
|
||||||
inline padded_string::padded_string(const char *data, size_t length) noexcept
|
inline padded_string::padded_string(const char *data, size_t length) noexcept
|
||||||
: viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
|
: viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
|
||||||
if ((data != nullptr) and (data_ptr != nullptr)) {
|
if ((data != nullptr) and (data_ptr != nullptr)) {
|
||||||
memcpy(data_ptr, data, length);
|
std::memcpy(data_ptr, data, length);
|
||||||
data_ptr[length] = '\0'; // easier when you need a c_str
|
data_ptr[length] = '\0'; // easier when you need a c_str
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ inline padded_string::padded_string(const char *data, size_t length) noexcept
|
||||||
inline padded_string::padded_string(const std::string & str_ ) noexcept
|
inline padded_string::padded_string(const std::string & str_ ) noexcept
|
||||||
: viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
|
: viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
|
||||||
if (data_ptr != nullptr) {
|
if (data_ptr != nullptr) {
|
||||||
memcpy(data_ptr, str_.data(), str_.size());
|
std::memcpy(data_ptr, str_.data(), str_.size());
|
||||||
data_ptr[str_.size()] = '\0'; // easier when you need a c_str
|
data_ptr[str_.size()] = '\0'; // easier when you need a c_str
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@ inline padded_string::padded_string(const std::string & str_ ) noexcept
|
||||||
inline padded_string::padded_string(std::string_view sv_) noexcept
|
inline padded_string::padded_string(std::string_view sv_) noexcept
|
||||||
: viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
|
: viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
|
||||||
if (data_ptr != nullptr) {
|
if (data_ptr != nullptr) {
|
||||||
memcpy(data_ptr, sv_.data(), sv_.size());
|
std::memcpy(data_ptr, sv_.data(), sv_.size());
|
||||||
data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
|
data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,8 +76,8 @@ simdjson_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block()
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
|
simdjson_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
|
||||||
if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
|
if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
|
||||||
memset(dst, 0x20, STEP_SIZE); // memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
|
std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
|
||||||
memcpy(dst, buf + idx, len - idx);
|
std::memcpy(dst, buf + idx, len - idx);
|
||||||
return len - idx;
|
return len - idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -179,8 +179,8 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_
|
||||||
//
|
//
|
||||||
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
||||||
if (copy == nullptr) { return MEMALLOC; }
|
if (copy == nullptr) { return MEMALLOC; }
|
||||||
memcpy(copy, value, iter.remaining_len());
|
std::memcpy(copy, value, iter.remaining_len());
|
||||||
memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
||||||
error_code error = visit_number(iter, copy);
|
error_code error = visit_number(iter, copy);
|
||||||
free(copy);
|
free(copy);
|
||||||
return error;
|
return error;
|
||||||
|
|
|
@ -255,10 +255,10 @@ namespace parse_api_tests {
|
||||||
uint64_t count = 0;
|
uint64_t count = 0;
|
||||||
constexpr const int BATCH_SIZE = 128;
|
constexpr const int BATCH_SIZE = 128;
|
||||||
uint8_t empty_batches_ndjson[BATCH_SIZE*16+SIMDJSON_PADDING];
|
uint8_t empty_batches_ndjson[BATCH_SIZE*16+SIMDJSON_PADDING];
|
||||||
memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING);
|
std::memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING);
|
||||||
memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1);
|
std::memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1);
|
||||||
memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1);
|
std::memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1);
|
||||||
memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1);
|
std::memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1);
|
||||||
simdjson::dom::document_stream stream;
|
simdjson::dom::document_stream stream;
|
||||||
ASSERT_SUCCESS( parser.parse_many(empty_batches_ndjson, BATCH_SIZE*16).get(stream) );
|
ASSERT_SUCCESS( parser.parse_many(empty_batches_ndjson, BATCH_SIZE*16).get(stream) );
|
||||||
for (auto doc : stream) {
|
for (auto doc : stream) {
|
||||||
|
|
|
@ -150,7 +150,7 @@ namespace adversarial {
|
||||||
bool number_overrun_at_root() {
|
bool number_overrun_at_root() {
|
||||||
TEST_START();
|
TEST_START();
|
||||||
constexpr const char *json = "1" PADDING_FILLED_WITH_NUMBERS ",";
|
constexpr const char *json = "1" PADDING_FILLED_WITH_NUMBERS ",";
|
||||||
constexpr size_t len = 1; // strlen("1");
|
constexpr size_t len = 1; // std::strlen("1");
|
||||||
|
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
uint64_t foo;
|
uint64_t foo;
|
||||||
|
@ -161,7 +161,7 @@ namespace adversarial {
|
||||||
bool number_overrun_in_array() {
|
bool number_overrun_in_array() {
|
||||||
TEST_START();
|
TEST_START();
|
||||||
constexpr const char *json = "[1" PADDING_FILLED_WITH_NUMBERS "]";
|
constexpr const char *json = "[1" PADDING_FILLED_WITH_NUMBERS "]";
|
||||||
constexpr size_t len = 2; // strlen("[1");
|
constexpr size_t len = 2; // std::strlen("[1");
|
||||||
|
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
uint64_t foo;
|
uint64_t foo;
|
||||||
|
@ -171,7 +171,7 @@ namespace adversarial {
|
||||||
bool number_overrun_in_object() {
|
bool number_overrun_in_object() {
|
||||||
TEST_START();
|
TEST_START();
|
||||||
constexpr const char *json = "{\"key\":1" PADDING_FILLED_WITH_NUMBERS "}";
|
constexpr const char *json = "{\"key\":1" PADDING_FILLED_WITH_NUMBERS "}";
|
||||||
constexpr size_t len = 8; // strlen("{\"key\":1");
|
constexpr size_t len = 8; // std::strlen("{\"key\":1");
|
||||||
|
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
uint64_t foo;
|
uint64_t foo;
|
||||||
|
@ -179,7 +179,7 @@ namespace adversarial {
|
||||||
TEST_SUCCEED();
|
TEST_SUCCEED();
|
||||||
}
|
}
|
||||||
bool run() {
|
bool run() {
|
||||||
static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = strlen(PADDING_FILLED_WITH_NUMBERS)
|
static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = std::strlen(PADDING_FILLED_WITH_NUMBERS)
|
||||||
return true
|
return true
|
||||||
&& number_overrun_at_root()
|
&& number_overrun_at_root()
|
||||||
&& number_overrun_in_array()
|
&& number_overrun_in_array()
|
||||||
|
|
|
@ -23,7 +23,7 @@ static bool has_extension(const char *filename, const char *extension) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool starts_with(const char *pre, const char *str) {
|
bool starts_with(const char *pre, const char *str) {
|
||||||
size_t len_pre = strlen(pre), len_str = strlen(str);
|
size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
|
||||||
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
|
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ bool contains(const char *pre, const char *str) {
|
||||||
bool validate(const char *dirname) {
|
bool validate(const char *dirname) {
|
||||||
bool everything_fine = true;
|
bool everything_fine = true;
|
||||||
const char *extension = ".json";
|
const char *extension = ".json";
|
||||||
size_t dirlen = strlen(dirname);
|
size_t dirlen = std::strlen(dirname);
|
||||||
struct dirent **entry_list;
|
struct dirent **entry_list;
|
||||||
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
|
@ -56,7 +56,7 @@ bool validate(const char *dirname) {
|
||||||
if (has_extension(name, extension)) {
|
if (has_extension(name, extension)) {
|
||||||
printf("validating: file %s ", name);
|
printf("validating: file %s ", name);
|
||||||
fflush(nullptr);
|
fflush(nullptr);
|
||||||
size_t namelen = strlen(name);
|
size_t namelen = std::strlen(name);
|
||||||
size_t fullpathlen = dirlen + 1 + namelen + 1;
|
size_t fullpathlen = dirlen + 1 + namelen + 1;
|
||||||
char *fullpath = static_cast<char *>(malloc(fullpathlen));
|
char *fullpath = static_cast<char *>(malloc(fullpathlen));
|
||||||
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
|
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
|
||||||
|
|
|
@ -21,7 +21,7 @@ static bool has_extension(const char *filename, const char *extension) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool starts_with(const char *pre, const char *str) {
|
bool starts_with(const char *pre, const char *str) {
|
||||||
size_t len_pre = strlen(pre), len_str = strlen(str);
|
size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
|
||||||
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
|
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ bool contains(const char *pre, const char *str) {
|
||||||
bool validate_minefield(const char *dirname) {
|
bool validate_minefield(const char *dirname) {
|
||||||
bool everything_fine = true;
|
bool everything_fine = true;
|
||||||
const char *extension = ".json";
|
const char *extension = ".json";
|
||||||
size_t dirlen = strlen(dirname);
|
size_t dirlen = std::strlen(dirname);
|
||||||
struct dirent **entry_list;
|
struct dirent **entry_list;
|
||||||
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
|
@ -54,7 +54,7 @@ bool validate_minefield(const char *dirname) {
|
||||||
if (has_extension(name, extension)) {
|
if (has_extension(name, extension)) {
|
||||||
printf("validating: file %s ", name);
|
printf("validating: file %s ", name);
|
||||||
fflush(nullptr);
|
fflush(nullptr);
|
||||||
size_t namelen = strlen(name);
|
size_t namelen = std::strlen(name);
|
||||||
size_t fullpathlen = dirlen + 1 + namelen + 1;
|
size_t fullpathlen = dirlen + 1 + namelen + 1;
|
||||||
char *fullpath = static_cast<char *>(malloc(fullpathlen));
|
char *fullpath = static_cast<char *>(malloc(fullpathlen));
|
||||||
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
|
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
|
||||||
|
|
|
@ -54,7 +54,7 @@ size_t invalid_count;
|
||||||
const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
|
const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
|
||||||
|
|
||||||
bool starts_with(const char *pre, const char *str) {
|
bool starts_with(const char *pre, const char *str) {
|
||||||
size_t lenpre = strlen(pre);
|
size_t lenpre = std::strlen(pre);
|
||||||
return strncmp(pre, str, lenpre) == 0;
|
return strncmp(pre, str, lenpre) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ bool validate(const char *dirname) {
|
||||||
parse_error = 0;
|
parse_error = 0;
|
||||||
size_t total_count = 0;
|
size_t total_count = 0;
|
||||||
const char *extension = ".json";
|
const char *extension = ".json";
|
||||||
size_t dirlen = strlen(dirname);
|
size_t dirlen = std::strlen(dirname);
|
||||||
struct dirent **entry_list;
|
struct dirent **entry_list;
|
||||||
int c = scandir(dirname, &entry_list, 0, alphasort);
|
int c = scandir(dirname, &entry_list, 0, alphasort);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
|
@ -183,7 +183,7 @@ bool validate(const char *dirname) {
|
||||||
for (int i = 0; i < c; i++) {
|
for (int i = 0; i < c; i++) {
|
||||||
const char *name = entry_list[i]->d_name;
|
const char *name = entry_list[i]->d_name;
|
||||||
if (has_extension(name, extension)) {
|
if (has_extension(name, extension)) {
|
||||||
size_t filelen = strlen(name);
|
size_t filelen = std::strlen(name);
|
||||||
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
||||||
strcpy(fullpath, dirname);
|
strcpy(fullpath, dirname);
|
||||||
if (needsep) {
|
if (needsep) {
|
||||||
|
|
|
@ -22,7 +22,7 @@ static bool has_extension(const char *filename, const char *extension) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool starts_with(const char *pre, const char *str) {
|
bool starts_with(const char *pre, const char *str) {
|
||||||
size_t len_pre = strlen(pre), len_str = strlen(str);
|
size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
|
||||||
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
|
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ bool validate(const char *dirname) {
|
||||||
const char *extension2 = ".jsonl";
|
const char *extension2 = ".jsonl";
|
||||||
const char *extension3 = ".json"; // bad json files shoud fail
|
const char *extension3 = ".json"; // bad json files shoud fail
|
||||||
|
|
||||||
size_t dirlen = strlen(dirname);
|
size_t dirlen = std::strlen(dirname);
|
||||||
struct dirent **entry_list;
|
struct dirent **entry_list;
|
||||||
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
|
@ -63,7 +63,7 @@ bool validate(const char *dirname) {
|
||||||
/* Finding the file path */
|
/* Finding the file path */
|
||||||
printf("validating: file %s ", name);
|
printf("validating: file %s ", name);
|
||||||
fflush(nullptr);
|
fflush(nullptr);
|
||||||
size_t namelen = strlen(name);
|
size_t namelen = std::strlen(name);
|
||||||
size_t fullpathlen = dirlen + 1 + namelen + 1;
|
size_t fullpathlen = dirlen + 1 + namelen + 1;
|
||||||
char *fullpath = static_cast<char *>(malloc(fullpathlen));
|
char *fullpath = static_cast<char *>(malloc(fullpathlen));
|
||||||
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
|
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
|
||||||
|
|
|
@ -179,7 +179,7 @@ bool issue1142() {
|
||||||
ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(example3));
|
ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(example3));
|
||||||
|
|
||||||
const char * input_array = "[]";
|
const char * input_array = "[]";
|
||||||
size_t input_length = strlen(input_array);
|
size_t input_length = std::strlen(input_array);
|
||||||
auto element4 = parser.parse(input_array, input_length).at_pointer("");;
|
auto element4 = parser.parse(input_array, input_length).at_pointer("");;
|
||||||
ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(element4));
|
ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(element4));
|
||||||
|
|
||||||
|
|
|
@ -261,7 +261,7 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
||||||
|
|
||||||
void minify() {
|
void minify() {
|
||||||
const char * some_string = "[ 1, 2, 3, 4] ";
|
const char * some_string = "[ 1, 2, 3, 4] ";
|
||||||
size_t length = strlen(some_string);
|
size_t length = std::strlen(some_string);
|
||||||
std::unique_ptr<char[]> buffer{new char[length]};
|
std::unique_ptr<char[]> buffer{new char[length]};
|
||||||
size_t new_length{};
|
size_t new_length{};
|
||||||
auto error = simdjson::minify(some_string, length, buffer.get(), new_length);
|
auto error = simdjson::minify(some_string, length, buffer.get(), new_length);
|
||||||
|
@ -270,7 +270,7 @@ void minify() {
|
||||||
abort();
|
abort();
|
||||||
} else {
|
} else {
|
||||||
const char * expected_string = "[1,2,3,4]";
|
const char * expected_string = "[1,2,3,4]";
|
||||||
size_t expected_length = strlen(expected_string);
|
size_t expected_length = std::strlen(expected_string);
|
||||||
if(expected_length != new_length) {
|
if(expected_length != new_length) {
|
||||||
std::cerr << "mismatched length (error) " << std::endl;
|
std::cerr << "mismatched length (error) " << std::endl;
|
||||||
abort();
|
abort();
|
||||||
|
@ -286,14 +286,14 @@ void minify() {
|
||||||
|
|
||||||
bool is_correct() {
|
bool is_correct() {
|
||||||
const char * some_string = "[ 1, 2, 3, 4] ";
|
const char * some_string = "[ 1, 2, 3, 4] ";
|
||||||
size_t length = strlen(some_string);
|
size_t length = std::strlen(some_string);
|
||||||
bool is_ok = simdjson::validate_utf8(some_string, length);
|
bool is_ok = simdjson::validate_utf8(some_string, length);
|
||||||
return is_ok;
|
return is_ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_correct_string_view() {
|
bool is_correct_string_view() {
|
||||||
const char * some_string = "[ 1, 2, 3, 4] ";
|
const char * some_string = "[ 1, 2, 3, 4] ";
|
||||||
size_t length = strlen(some_string);
|
size_t length = std::strlen(some_string);
|
||||||
std::string_view v(some_string, length);
|
std::string_view v(some_string, length);
|
||||||
bool is_ok = simdjson::validate_utf8(v);
|
bool is_ok = simdjson::validate_utf8(v);
|
||||||
return is_ok;
|
return is_ok;
|
||||||
|
@ -305,6 +305,31 @@ bool is_correct_string() {
|
||||||
return is_ok;
|
return is_ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void parse_documentation() {
|
||||||
|
const char *json = R"({"key":"value"})";
|
||||||
|
const size_t json_len = std::strlen(json);
|
||||||
|
simdjson::dom::parser parser;
|
||||||
|
simdjson::dom::element element = parser.parse(json, json_len);
|
||||||
|
// Next line is to avoid unused warning.
|
||||||
|
(void)element;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void parse_documentation_lowlevel() {
|
||||||
|
// Such low-level code is not generally recommended. Please
|
||||||
|
// see parse_documentation() instead.
|
||||||
|
// Motivation: https://github.com/simdjson/simdjson/issues/1175
|
||||||
|
const char *json = R"({"key":"value"})";
|
||||||
|
const size_t json_len = std::strlen(json);
|
||||||
|
std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
|
||||||
|
std::memcpy(padded_json_copy.get(), json, json_len);
|
||||||
|
std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
|
||||||
|
simdjson::dom::parser parser;
|
||||||
|
simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
|
||||||
|
// Next line is to avoid unused warning.
|
||||||
|
(void)element;
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
basics_dom_1();
|
basics_dom_1();
|
||||||
basics_dom_2();
|
basics_dom_2();
|
||||||
|
|
|
@ -305,7 +305,7 @@ static bool has_extension(const char *filename, const char *extension) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool starts_with(const char *pre, const char *str) {
|
bool starts_with(const char *pre, const char *str) {
|
||||||
size_t lenpre = strlen(pre), lenstr = strlen(str);
|
size_t lenpre = std::strlen(pre), lenstr = std::strlen(str);
|
||||||
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
|
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -313,7 +313,7 @@ bool validate(const char *dirname) {
|
||||||
size_t total_strings = 0;
|
size_t total_strings = 0;
|
||||||
probable_bug = false;
|
probable_bug = false;
|
||||||
const char *extension = ".json";
|
const char *extension = ".json";
|
||||||
size_t dirlen = strlen(dirname);
|
size_t dirlen = std::strlen(dirname);
|
||||||
struct dirent **entry_list;
|
struct dirent **entry_list;
|
||||||
int c = scandir(dirname, &entry_list, 0, alphasort);
|
int c = scandir(dirname, &entry_list, 0, alphasort);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
|
@ -328,7 +328,7 @@ bool validate(const char *dirname) {
|
||||||
for (int i = 0; i < c; i++) {
|
for (int i = 0; i < c; i++) {
|
||||||
const char *name = entry_list[i]->d_name;
|
const char *name = entry_list[i]->d_name;
|
||||||
if (has_extension(name, extension)) {
|
if (has_extension(name, extension)) {
|
||||||
size_t filelen = strlen(name);
|
size_t filelen = std::strlen(name);
|
||||||
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
||||||
strcpy(fullpath, dirname);
|
strcpy(fullpath, dirname);
|
||||||
if (needsep) {
|
if (needsep) {
|
||||||
|
|
|
@ -224,14 +224,14 @@ void test() {
|
||||||
"\x91\x85\x95\x9e",
|
"\x91\x85\x95\x9e",
|
||||||
"\x6c\x02\x8e\x18"};
|
"\x6c\x02\x8e\x18"};
|
||||||
for (size_t i = 0; i < 8; i++) {
|
for (size_t i = 0; i < 8; i++) {
|
||||||
size_t len = strlen(goodsequences[i]);
|
size_t len = std::strlen(goodsequences[i]);
|
||||||
if (!simdjson::validate_utf8(goodsequences[i], len)) {
|
if (!simdjson::validate_utf8(goodsequences[i], len)) {
|
||||||
printf("bug goodsequences[%zu]\n", i);
|
printf("bug goodsequences[%zu]\n", i);
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < 26; i++) {
|
for (size_t i = 0; i < 26; i++) {
|
||||||
size_t len = strlen(badsequences[i]);
|
size_t len = std::strlen(badsequences[i]);
|
||||||
if (simdjson::validate_utf8(badsequences[i], len)) {
|
if (simdjson::validate_utf8(badsequences[i], len)) {
|
||||||
printf("bug lookup2 badsequences[%zu]\n", i);
|
printf("bug lookup2 badsequences[%zu]\n", i);
|
||||||
abort();
|
abort();
|
||||||
|
|
Loading…
Reference in New Issue