Improve documentation on padding

- Improves and clarifies the documentation on padding.
 - Use std:: prefix for memcpy, strlen etc.

Related to issues #1175 and #1178
This commit is contained in:
Daniel Lemire 2020-09-23 03:07:14 -04:00 committed by GitHub
parent 19cb5d57db
commit f410213003
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 129 additions and 58 deletions

View File

@ -8,7 +8,7 @@
#include <unistd.h> // for syscall
#include <cerrno> // for errno
#include <cstring> // for memset
#include <cstring> // for std::memset
#include <stdexcept>
#include <iostream>
@ -24,7 +24,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
public:
explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
memset(&attribs, 0, sizeof(attribs));
std::memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE;
attribs.size = sizeof(attribs);
attribs.disabled = 1;

View File

@ -296,8 +296,8 @@ simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const ch
}
sax_tweet_reader_visitor::field_lookup::field_lookup() {
add("\"statuses\"", strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
#define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
add("\"statuses\"", std::strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
#define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
TWEET_FIELD(id, field_type::unsigned_integer);
TWEET_FIELD(in_reply_to_status_id, field_type::nullable_unsigned_integer);
TWEET_FIELD(retweet_count, field_type::unsigned_integer);
@ -306,7 +306,7 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() {
TWEET_FIELD(created_at, field_type::string);
TWEET_FIELD(user, field_type::object)
#undef TWEET_FIELD
#define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
#define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
USER_FIELD(id, field_type::unsigned_integer);
USER_FIELD(screen_name, field_type::string);
#undef USER_FIELD

View File

@ -278,7 +278,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but
// Starts with a valid JSON document as a string.
// It does not have to be null-terminated.
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = strlen(some_string);
size_t length = std::strlen(some_string);
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
std::unique_ptr<char[]> buffer{new char[length]};
size_t new_length{}; // It will receive the minified length.
@ -296,7 +296,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many
```C++
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = strlen(some_string);
size_t length = std::strlen(some_string);
bool is_ok = simdjson::validate_utf8(some_string, length);
```

View File

@ -260,7 +260,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but
// Starts with a valid JSON document as a string.
// It does not have to be null-terminated.
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = strlen(some_string);
size_t length = std::strlen(some_string);
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
std::unique_ptr<char[]> buffer{new char[length]};
size_t new_length{}; // It will receive the minified length.
@ -278,7 +278,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many
```
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = strlen(some_string);
size_t length = std::strlen(some_string);
bool is_ok = simdjson::validate_utf8(some_string, length);
```

View File

@ -12,6 +12,8 @@ are still some scenarios where tuning can enhance performance.
* [Visual Studio](#visual-studio)
* [Downclocking](#downclocking)
* [Best Use of the DOM API](#best-use-of-the-dom-api)
* [Padding and Temporary Copies](#padding-and-temporary-copies)
Reusing the parser for maximum efficiency
-----------------------------------------
@ -174,3 +176,25 @@ Best Use of the DOM API
The simdjson API provides access to the JSON DOM (document-object-model) content as a tree of `dom::element` instances, each representing an object, an array or an atomic type (null, true, false, number). These `dom::element` instances are lightweight objects (e.g., spanning 16 bytes) and it might be advantageous to pass them by value, as opposed to passing them by reference or by pointer.
Padding and Temporary Copies
--------------
The simdjson function `parser.parse` reads data from a padded buffer, containing SIMDJSON_PADDING extra bytes added at the end.
If you are passing a `padded_string` to `parser.parse` or loading the JSON directly from
disk (`parser.load`), padding is automatically handled.
When calling `parser.parse` on a pointer (e.g., `parser.parse(mystring, mylength)`) a temporary copy is made by default with adequate padding and you, again, do not need to be concerned with padding.
Some users may not be able use our `padded_string` class or to load the data directly from disk (`parser.load`). They may need to pass data pointers to the library. If these users wish to avoid temporary copies and corresponding temporary memory allocations, they may want to call `parser.parse` with the `realloc_if_needed` parameter set to false (e.g., `parser.parse(mystring, mylength, false)`). In such cases, they need to ensure that there are at least SIMDJSON_PADDING extra bytes at the end that can be safely accessed and read. They do not need to initialize the padded bytes to any value in particular. The following example is safe:
```C++
const char *json = R"({"key":"value"})";
const size_t json_len = std::strlen(json);
std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
memcpy(padded_json_copy.get(), json, json_len);
memset(padded_json_copy.get() + json_len, 0, SIMDJSON_PADDING);
simdjson::dom::parser parser;
simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
````
Setting the `realloc_if_needed` parameter false in this manner may lead to better performance, but it requires that the user takes more responsibilities: the simdjson library cannot verify that the input buffer was padded.

View File

@ -66,7 +66,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
switch (type) {
case '"': // we have a string
os << "string \"";
memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
os << internal::escape_json_string(std::string_view(
(const char *)(string_buf.get() + payload + sizeof(uint32_t)),
string_length
@ -92,7 +92,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
return false;
}
double answer;
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
std::memcpy(&answer, &tape[++tape_idx], sizeof(answer));
os << answer << '\n';
break;
case 'n': // we have a null

View File

@ -252,7 +252,7 @@ dom::parser::Iterator::Iterator(
current_val(o.current_val)
{
depth_index = new scopeindex_t[max_depth+1];
memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
}
dom::parser::Iterator::~Iterator() noexcept {

View File

@ -78,7 +78,7 @@ public:
// return the length of the string in bytes
inline uint32_t get_string_length() const {
uint32_t answer;
memcpy(&answer,
std::memcpy(&answer,
reinterpret_cast<const char *>(doc.string_buf.get() +
(current_val & internal::JSON_VALUE_MASK)),
sizeof(uint32_t));
@ -93,7 +93,7 @@ public:
// case of error
}
double answer;
memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
return answer;
}

View File

@ -98,7 +98,7 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
if (realloc_if_needed) {
tmp_buf.reset((uint8_t *)internal::allocate_padded_buffer(len));
if (tmp_buf.get() == nullptr) { return MEMALLOC; }
memcpy((void *)tmp_buf.get(), buf, len);
std::memcpy((void *)tmp_buf.get(), buf, len);
}
_error = implementation->parse(realloc_if_needed ? tmp_buf.get() : buf, len, doc);
if (_error) { return _error; }

View File

@ -114,8 +114,30 @@ public:
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
* those bytes are initialized to, as long as they are allocated.
*
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
* and it is copied into an enlarged temporary buffer before parsing.
* If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
* and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
*
* const char *json = R"({"key":"value"})";
* const size_t json_len = std::strlen(json);
* simdjson::dom::parser parser;
* simdjson::dom::element element = parser.parse(json, json_len);
*
* If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
* you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
* The benefit of setting realloc_if_needed to false is that you avoid a temporary
* memory allocation and a copy.
*
* The padded bytes may be read. It is not important how you initialize
* these bytes though we recommend a sensible default like null character values or spaces.
* For example, the following low-level code is safe:
*
* const char *json = R"({"key":"value"})";
* const size_t json_len = std::strlen(json);
* std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
* std::memcpy(padded_json_copy.get(), json, json_len);
* std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
* simdjson::dom::parser parser;
* simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
*
* ### Parser Capacity
*

View File

@ -43,7 +43,7 @@ enum error_code {
*
* dom::parser parser;
* dom::element doc;
* auto error = parser.parse("foo").get(doc);
* auto error = parser.parse("foo",3).get(doc);
* if (error) { printf("Error: %s\n", error_message(error)); }
*
* @return The error message.

View File

@ -81,14 +81,14 @@ simdjson_really_inline T tape_ref::next_tape_value() const noexcept {
// It is not generally safe. It is safer, and often faster to rely
// on memcpy. Yes, it is uglier, but it is also encapsulated.
T x;
memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
return x;
}
simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
size_t string_buf_index = size_t(tape_value());
uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return len;
}

View File

@ -27,7 +27,7 @@ inline char *allocate_padded_buffer(size_t length) noexcept {
// We write zeroes in the padded region to avoid having uninitized
// garbage. If nothing else, garbage getting read might trigger a
// warning in a memory checking.
memset(padded_buffer + length, 0, totalpaddedlength - length);
std::memset(padded_buffer + length, 0, totalpaddedlength - length);
return padded_buffer;
} // allocate_padded_buffer()
@ -43,7 +43,7 @@ inline padded_string::padded_string(size_t length) noexcept
inline padded_string::padded_string(const char *data, size_t length) noexcept
: viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
if ((data != nullptr) and (data_ptr != nullptr)) {
memcpy(data_ptr, data, length);
std::memcpy(data_ptr, data, length);
data_ptr[length] = '\0'; // easier when you need a c_str
}
}
@ -51,7 +51,7 @@ inline padded_string::padded_string(const char *data, size_t length) noexcept
inline padded_string::padded_string(const std::string & str_ ) noexcept
: viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
if (data_ptr != nullptr) {
memcpy(data_ptr, str_.data(), str_.size());
std::memcpy(data_ptr, str_.data(), str_.size());
data_ptr[str_.size()] = '\0'; // easier when you need a c_str
}
}
@ -59,7 +59,7 @@ inline padded_string::padded_string(const std::string & str_ ) noexcept
inline padded_string::padded_string(std::string_view sv_) noexcept
: viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
if (data_ptr != nullptr) {
memcpy(data_ptr, sv_.data(), sv_.size());
std::memcpy(data_ptr, sv_.data(), sv_.size());
data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
}
}

View File

@ -76,8 +76,8 @@ simdjson_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block()
template<size_t STEP_SIZE>
simdjson_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
memset(dst, 0x20, STEP_SIZE); // memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
memcpy(dst, buf + idx, len - idx);
std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
std::memcpy(dst, buf + idx, len - idx);
return len - idx;
}

View File

@ -179,8 +179,8 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_
//
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
if (copy == nullptr) { return MEMALLOC; }
memcpy(copy, value, iter.remaining_len());
memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
std::memcpy(copy, value, iter.remaining_len());
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
error_code error = visit_number(iter, copy);
free(copy);
return error;

View File

@ -255,10 +255,10 @@ namespace parse_api_tests {
uint64_t count = 0;
constexpr const int BATCH_SIZE = 128;
uint8_t empty_batches_ndjson[BATCH_SIZE*16+SIMDJSON_PADDING];
memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING);
memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1);
memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1);
memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1);
std::memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING);
std::memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1);
std::memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1);
std::memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1);
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(empty_batches_ndjson, BATCH_SIZE*16).get(stream) );
for (auto doc : stream) {

View File

@ -150,7 +150,7 @@ namespace adversarial {
bool number_overrun_at_root() {
TEST_START();
constexpr const char *json = "1" PADDING_FILLED_WITH_NUMBERS ",";
constexpr size_t len = 1; // strlen("1");
constexpr size_t len = 1; // std::strlen("1");
dom::parser parser;
uint64_t foo;
@ -161,7 +161,7 @@ namespace adversarial {
bool number_overrun_in_array() {
TEST_START();
constexpr const char *json = "[1" PADDING_FILLED_WITH_NUMBERS "]";
constexpr size_t len = 2; // strlen("[1");
constexpr size_t len = 2; // std::strlen("[1");
dom::parser parser;
uint64_t foo;
@ -171,7 +171,7 @@ namespace adversarial {
bool number_overrun_in_object() {
TEST_START();
constexpr const char *json = "{\"key\":1" PADDING_FILLED_WITH_NUMBERS "}";
constexpr size_t len = 8; // strlen("{\"key\":1");
constexpr size_t len = 8; // std::strlen("{\"key\":1");
dom::parser parser;
uint64_t foo;
@ -179,7 +179,7 @@ namespace adversarial {
TEST_SUCCEED();
}
bool run() {
static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = strlen(PADDING_FILLED_WITH_NUMBERS)
static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = std::strlen(PADDING_FILLED_WITH_NUMBERS)
return true
&& number_overrun_at_root()
&& number_overrun_in_array()

View File

@ -23,7 +23,7 @@ static bool has_extension(const char *filename, const char *extension) {
}
bool starts_with(const char *pre, const char *str) {
size_t len_pre = strlen(pre), len_str = strlen(str);
size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
}
@ -34,7 +34,7 @@ bool contains(const char *pre, const char *str) {
bool validate(const char *dirname) {
bool everything_fine = true;
const char *extension = ".json";
size_t dirlen = strlen(dirname);
size_t dirlen = std::strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, nullptr, alphasort);
if (c < 0) {
@ -56,7 +56,7 @@ bool validate(const char *dirname) {
if (has_extension(name, extension)) {
printf("validating: file %s ", name);
fflush(nullptr);
size_t namelen = strlen(name);
size_t namelen = std::strlen(name);
size_t fullpathlen = dirlen + 1 + namelen + 1;
char *fullpath = static_cast<char *>(malloc(fullpathlen));
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);

View File

@ -21,7 +21,7 @@ static bool has_extension(const char *filename, const char *extension) {
}
bool starts_with(const char *pre, const char *str) {
size_t len_pre = strlen(pre), len_str = strlen(str);
size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
}
@ -32,7 +32,7 @@ bool contains(const char *pre, const char *str) {
bool validate_minefield(const char *dirname) {
bool everything_fine = true;
const char *extension = ".json";
size_t dirlen = strlen(dirname);
size_t dirlen = std::strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, nullptr, alphasort);
if (c < 0) {
@ -54,7 +54,7 @@ bool validate_minefield(const char *dirname) {
if (has_extension(name, extension)) {
printf("validating: file %s ", name);
fflush(nullptr);
size_t namelen = strlen(name);
size_t namelen = std::strlen(name);
size_t fullpathlen = dirlen + 1 + namelen + 1;
char *fullpath = static_cast<char *>(malloc(fullpathlen));
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);

View File

@ -54,7 +54,7 @@ size_t invalid_count;
const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
bool starts_with(const char *pre, const char *str) {
size_t lenpre = strlen(pre);
size_t lenpre = std::strlen(pre);
return strncmp(pre, str, lenpre) == 0;
}
@ -168,7 +168,7 @@ bool validate(const char *dirname) {
parse_error = 0;
size_t total_count = 0;
const char *extension = ".json";
size_t dirlen = strlen(dirname);
size_t dirlen = std::strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort);
if (c < 0) {
@ -183,7 +183,7 @@ bool validate(const char *dirname) {
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (has_extension(name, extension)) {
size_t filelen = strlen(name);
size_t filelen = std::strlen(name);
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
strcpy(fullpath, dirname);
if (needsep) {

View File

@ -22,7 +22,7 @@ static bool has_extension(const char *filename, const char *extension) {
}
bool starts_with(const char *pre, const char *str) {
size_t len_pre = strlen(pre), len_str = strlen(str);
size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
}
@ -36,7 +36,7 @@ bool validate(const char *dirname) {
const char *extension2 = ".jsonl";
const char *extension3 = ".json"; // bad json files shoud fail
size_t dirlen = strlen(dirname);
size_t dirlen = std::strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, nullptr, alphasort);
if (c < 0) {
@ -63,7 +63,7 @@ bool validate(const char *dirname) {
/* Finding the file path */
printf("validating: file %s ", name);
fflush(nullptr);
size_t namelen = strlen(name);
size_t namelen = std::strlen(name);
size_t fullpathlen = dirlen + 1 + namelen + 1;
char *fullpath = static_cast<char *>(malloc(fullpathlen));
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);

View File

@ -179,7 +179,7 @@ bool issue1142() {
ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(example3));
const char * input_array = "[]";
size_t input_length = strlen(input_array);
size_t input_length = std::strlen(input_array);
auto element4 = parser.parse(input_array, input_length).at_pointer("");;
ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(element4));

View File

@ -261,7 +261,7 @@ SIMDJSON_POP_DISABLE_WARNINGS
void minify() {
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = strlen(some_string);
size_t length = std::strlen(some_string);
std::unique_ptr<char[]> buffer{new char[length]};
size_t new_length{};
auto error = simdjson::minify(some_string, length, buffer.get(), new_length);
@ -270,7 +270,7 @@ void minify() {
abort();
} else {
const char * expected_string = "[1,2,3,4]";
size_t expected_length = strlen(expected_string);
size_t expected_length = std::strlen(expected_string);
if(expected_length != new_length) {
std::cerr << "mismatched length (error) " << std::endl;
abort();
@ -286,14 +286,14 @@ void minify() {
bool is_correct() {
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = strlen(some_string);
size_t length = std::strlen(some_string);
bool is_ok = simdjson::validate_utf8(some_string, length);
return is_ok;
}
bool is_correct_string_view() {
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = strlen(some_string);
size_t length = std::strlen(some_string);
std::string_view v(some_string, length);
bool is_ok = simdjson::validate_utf8(v);
return is_ok;
@ -305,6 +305,31 @@ bool is_correct_string() {
return is_ok;
}
void parse_documentation() {
const char *json = R"({"key":"value"})";
const size_t json_len = std::strlen(json);
simdjson::dom::parser parser;
simdjson::dom::element element = parser.parse(json, json_len);
// Next line is to avoid unused warning.
(void)element;
}
void parse_documentation_lowlevel() {
// Such low-level code is not generally recommended. Please
// see parse_documentation() instead.
// Motivation: https://github.com/simdjson/simdjson/issues/1175
const char *json = R"({"key":"value"})";
const size_t json_len = std::strlen(json);
std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
std::memcpy(padded_json_copy.get(), json, json_len);
std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
simdjson::dom::parser parser;
simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
// Next line is to avoid unused warning.
(void)element;
}
int main() {
basics_dom_1();
basics_dom_2();

View File

@ -305,7 +305,7 @@ static bool has_extension(const char *filename, const char *extension) {
}
bool starts_with(const char *pre, const char *str) {
size_t lenpre = strlen(pre), lenstr = strlen(str);
size_t lenpre = std::strlen(pre), lenstr = std::strlen(str);
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
}
@ -313,7 +313,7 @@ bool validate(const char *dirname) {
size_t total_strings = 0;
probable_bug = false;
const char *extension = ".json";
size_t dirlen = strlen(dirname);
size_t dirlen = std::strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort);
if (c < 0) {
@ -328,7 +328,7 @@ bool validate(const char *dirname) {
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (has_extension(name, extension)) {
size_t filelen = strlen(name);
size_t filelen = std::strlen(name);
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
strcpy(fullpath, dirname);
if (needsep) {

View File

@ -224,14 +224,14 @@ void test() {
"\x91\x85\x95\x9e",
"\x6c\x02\x8e\x18"};
for (size_t i = 0; i < 8; i++) {
size_t len = strlen(goodsequences[i]);
size_t len = std::strlen(goodsequences[i]);
if (!simdjson::validate_utf8(goodsequences[i], len)) {
printf("bug goodsequences[%zu]\n", i);
abort();
}
}
for (size_t i = 0; i < 26; i++) {
size_t len = strlen(badsequences[i]);
size_t len = std::strlen(badsequences[i]);
if (simdjson::validate_utf8(badsequences[i], len)) {
printf("bug lookup2 badsequences[%zu]\n", i);
abort();