From f410213003b378d6f7edf17a9a44c94e75bc36ba Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 23 Sep 2020 03:07:14 -0400 Subject: [PATCH] Improve documentation on padding - Improves and clarifies the documentation on padding. - Use std:: prefix for memcpy, strlen etc. Related to issues #1175 and #1178 --- benchmark/linux/linux-perf-events.h | 4 +-- benchmark/twitter/sax_tweet_reader_visitor.h | 6 ++-- doc/basics.md | 4 +-- doc/basics_doxygen.md | 4 +-- doc/performance.md | 24 ++++++++++++++ include/simdjson/dom/document-inl.h | 4 +-- .../simdjson/dom/parsedjson_iterator-inl.h | 2 +- include/simdjson/dom/parsedjson_iterator.h | 4 +-- include/simdjson/dom/parser-inl.h | 2 +- include/simdjson/dom/parser.h | 26 +++++++++++++-- include/simdjson/error.h | 2 +- include/simdjson/internal/tape_ref-inl.h | 4 +-- include/simdjson/padded_string-inl.h | 8 ++--- src/generic/stage1/buf_block_reader.h | 4 +-- src/generic/stage2/tape_builder.h | 4 +-- tests/basictests.cpp | 8 ++--- tests/errortests.cpp | 8 ++--- tests/jsoncheck.cpp | 6 ++-- tests/minefieldcheck.cpp | 6 ++-- tests/numberparsingcheck.cpp | 6 ++-- tests/parse_many_test.cpp | 6 ++-- tests/pointercheck.cpp | 2 +- tests/readme_examples.cpp | 33 ++++++++++++++++--- tests/stringparsingcheck.cpp | 6 ++-- tests/unicode_tests.cpp | 4 +-- 25 files changed, 129 insertions(+), 58 deletions(-) diff --git a/benchmark/linux/linux-perf-events.h b/benchmark/linux/linux-perf-events.h index 0f411e0e..1867b273 100644 --- a/benchmark/linux/linux-perf-events.h +++ b/benchmark/linux/linux-perf-events.h @@ -8,7 +8,7 @@ #include // for syscall #include // for errno -#include // for memset +#include // for std::memset #include #include @@ -24,7 +24,7 @@ template class LinuxEvents { public: explicit LinuxEvents(std::vector config_vec) : fd(0), working(true) { - memset(&attribs, 0, sizeof(attribs)); + std::memset(&attribs, 0, sizeof(attribs)); attribs.type = TYPE; attribs.size = sizeof(attribs); attribs.disabled = 1; diff --git a/benchmark/twitter/sax_tweet_reader_visitor.h b/benchmark/twitter/sax_tweet_reader_visitor.h index 330f0743..f4781752 100644 --- a/benchmark/twitter/sax_tweet_reader_visitor.h +++ b/benchmark/twitter/sax_tweet_reader_visitor.h @@ -296,8 +296,8 @@ simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const ch } sax_tweet_reader_visitor::field_lookup::field_lookup() { - add("\"statuses\"", strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...] - #define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY)); + add("\"statuses\"", std::strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...] + #define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY)); TWEET_FIELD(id, field_type::unsigned_integer); TWEET_FIELD(in_reply_to_status_id, field_type::nullable_unsigned_integer); TWEET_FIELD(retweet_count, field_type::unsigned_integer); @@ -306,7 +306,7 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() { TWEET_FIELD(created_at, field_type::string); TWEET_FIELD(user, field_type::object) #undef TWEET_FIELD - #define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY)); + #define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY)); USER_FIELD(id, field_type::unsigned_integer); USER_FIELD(screen_name, field_type::string); #undef USER_FIELD diff --git a/doc/basics.md b/doc/basics.md index 68cd4ccd..fd896f04 100644 --- a/doc/basics.md +++ b/doc/basics.md @@ -278,7 +278,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but // Starts with a valid JSON document as a string. // It does not have to be null-terminated. const char * some_string = "[ 1, 2, 3, 4] "; - size_t length = strlen(some_string); + size_t length = std::strlen(some_string); // Create a buffer to receive the minified string. Make sure that there is enough room (length bytes). std::unique_ptr buffer{new char[length]}; size_t new_length{}; // It will receive the minified length. @@ -296,7 +296,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many ```C++ const char * some_string = "[ 1, 2, 3, 4] "; - size_t length = strlen(some_string); + size_t length = std::strlen(some_string); bool is_ok = simdjson::validate_utf8(some_string, length); ``` diff --git a/doc/basics_doxygen.md b/doc/basics_doxygen.md index 14fa5973..1b0facad 100644 --- a/doc/basics_doxygen.md +++ b/doc/basics_doxygen.md @@ -260,7 +260,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but // Starts with a valid JSON document as a string. // It does not have to be null-terminated. const char * some_string = "[ 1, 2, 3, 4] "; - size_t length = strlen(some_string); + size_t length = std::strlen(some_string); // Create a buffer to receive the minified string. Make sure that there is enough room (length bytes). std::unique_ptr buffer{new char[length]}; size_t new_length{}; // It will receive the minified length. @@ -278,7 +278,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many ``` const char * some_string = "[ 1, 2, 3, 4] "; - size_t length = strlen(some_string); + size_t length = std::strlen(some_string); bool is_ok = simdjson::validate_utf8(some_string, length); ``` diff --git a/doc/performance.md b/doc/performance.md index 67a28013..20292a4f 100644 --- a/doc/performance.md +++ b/doc/performance.md @@ -12,6 +12,8 @@ are still some scenarios where tuning can enhance performance. * [Visual Studio](#visual-studio) * [Downclocking](#downclocking) * [Best Use of the DOM API](#best-use-of-the-dom-api) +* [Padding and Temporary Copies](#padding-and-temporary-copies) + Reusing the parser for maximum efficiency ----------------------------------------- @@ -174,3 +176,25 @@ Best Use of the DOM API The simdjson API provides access to the JSON DOM (document-object-model) content as a tree of `dom::element` instances, each representing an object, an array or an atomic type (null, true, false, number). These `dom::element` instances are lightweight objects (e.g., spanning 16 bytes) and it might be advantageous to pass them by value, as opposed to passing them by reference or by pointer. +Padding and Temporary Copies +-------------- + +The simdjson function `parser.parse` reads data from a padded buffer, containing SIMDJSON_PADDING extra bytes added at the end. +If you are passing a `padded_string` to `parser.parse` or loading the JSON directly from +disk (`parser.load`), padding is automatically handled. +When calling `parser.parse` on a pointer (e.g., `parser.parse(mystring, mylength)`) a temporary copy is made by default with adequate padding and you, again, do not need to be concerned with padding. + +Some users may not be able use our `padded_string` class or to load the data directly from disk (`parser.load`). They may need to pass data pointers to the library. If these users wish to avoid temporary copies and corresponding temporary memory allocations, they may want to call `parser.parse` with the `realloc_if_needed` parameter set to false (e.g., `parser.parse(mystring, mylength, false)`). In such cases, they need to ensure that there are at least SIMDJSON_PADDING extra bytes at the end that can be safely accessed and read. They do not need to initialize the padded bytes to any value in particular. The following example is safe: + + +```C++ +const char *json = R"({"key":"value"})"; +const size_t json_len = std::strlen(json); +std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; +memcpy(padded_json_copy.get(), json, json_len); +memset(padded_json_copy.get() + json_len, 0, SIMDJSON_PADDING); +simdjson::dom::parser parser; +simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); +```` + +Setting the `realloc_if_needed` parameter false in this manner may lead to better performance, but it requires that the user takes more responsibilities: the simdjson library cannot verify that the input buffer was padded. \ No newline at end of file diff --git a/include/simdjson/dom/document-inl.h b/include/simdjson/dom/document-inl.h index 2cfa1d69..0725ce82 100644 --- a/include/simdjson/dom/document-inl.h +++ b/include/simdjson/dom/document-inl.h @@ -66,7 +66,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept { switch (type) { case '"': // we have a string os << "string \""; - memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); + std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); os << internal::escape_json_string(std::string_view( (const char *)(string_buf.get() + payload + sizeof(uint32_t)), string_length @@ -92,7 +92,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept { return false; } double answer; - memcpy(&answer, &tape[++tape_idx], sizeof(answer)); + std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); os << answer << '\n'; break; case 'n': // we have a null diff --git a/include/simdjson/dom/parsedjson_iterator-inl.h b/include/simdjson/dom/parsedjson_iterator-inl.h index 66833f9a..5133c976 100644 --- a/include/simdjson/dom/parsedjson_iterator-inl.h +++ b/include/simdjson/dom/parsedjson_iterator-inl.h @@ -252,7 +252,7 @@ dom::parser::Iterator::Iterator( current_val(o.current_val) { depth_index = new scopeindex_t[max_depth+1]; - memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); + std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); } dom::parser::Iterator::~Iterator() noexcept { diff --git a/include/simdjson/dom/parsedjson_iterator.h b/include/simdjson/dom/parsedjson_iterator.h index 2a0f32be..5656595e 100644 --- a/include/simdjson/dom/parsedjson_iterator.h +++ b/include/simdjson/dom/parsedjson_iterator.h @@ -78,7 +78,7 @@ public: // return the length of the string in bytes inline uint32_t get_string_length() const { uint32_t answer; - memcpy(&answer, + std::memcpy(&answer, reinterpret_cast(doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK)), sizeof(uint32_t)); @@ -93,7 +93,7 @@ public: // case of error } double answer; - memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); + std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); return answer; } diff --git a/include/simdjson/dom/parser-inl.h b/include/simdjson/dom/parser-inl.h index 4e169806..8121cb72 100644 --- a/include/simdjson/dom/parser-inl.h +++ b/include/simdjson/dom/parser-inl.h @@ -98,7 +98,7 @@ inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bo if (realloc_if_needed) { tmp_buf.reset((uint8_t *)internal::allocate_padded_buffer(len)); if (tmp_buf.get() == nullptr) { return MEMALLOC; } - memcpy((void *)tmp_buf.get(), buf, len); + std::memcpy((void *)tmp_buf.get(), buf, len); } _error = implementation->parse(realloc_if_needed ? tmp_buf.get() : buf, len, doc); if (_error) { return _error; } diff --git a/include/simdjson/dom/parser.h b/include/simdjson/dom/parser.h index 996f4929..2e73c2a3 100644 --- a/include/simdjson/dom/parser.h +++ b/include/simdjson/dom/parser.h @@ -114,8 +114,30 @@ public: * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * - * If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding, - * and it is copied into an enlarged temporary buffer before parsing. + * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, + * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(json, json_len); + * + * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), + * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. + * The benefit of setting realloc_if_needed to false is that you avoid a temporary + * memory allocation and a copy. + * + * The padded bytes may be read. It is not important how you initialize + * these bytes though we recommend a sensible default like null character values or spaces. + * For example, the following low-level code is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; + * std::memcpy(padded_json_copy.get(), json, json_len); + * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); * * ### Parser Capacity * diff --git a/include/simdjson/error.h b/include/simdjson/error.h index 5f6e5dfa..9f9ee06b 100644 --- a/include/simdjson/error.h +++ b/include/simdjson/error.h @@ -43,7 +43,7 @@ enum error_code { * * dom::parser parser; * dom::element doc; - * auto error = parser.parse("foo").get(doc); + * auto error = parser.parse("foo",3).get(doc); * if (error) { printf("Error: %s\n", error_message(error)); } * * @return The error message. diff --git a/include/simdjson/internal/tape_ref-inl.h b/include/simdjson/internal/tape_ref-inl.h index 7a20ad74..9aaa06b1 100644 --- a/include/simdjson/internal/tape_ref-inl.h +++ b/include/simdjson/internal/tape_ref-inl.h @@ -81,14 +81,14 @@ simdjson_really_inline T tape_ref::next_tape_value() const noexcept { // It is not generally safe. It is safer, and often faster to rely // on memcpy. Yes, it is uglier, but it is also encapsulated. T x; - memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); + std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); return x; } simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { size_t string_buf_index = size_t(tape_value()); uint32_t len; - memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return len; } diff --git a/include/simdjson/padded_string-inl.h b/include/simdjson/padded_string-inl.h index 4543d2c9..49326dd0 100644 --- a/include/simdjson/padded_string-inl.h +++ b/include/simdjson/padded_string-inl.h @@ -27,7 +27,7 @@ inline char *allocate_padded_buffer(size_t length) noexcept { // We write zeroes in the padded region to avoid having uninitized // garbage. If nothing else, garbage getting read might trigger a // warning in a memory checking. - memset(padded_buffer + length, 0, totalpaddedlength - length); + std::memset(padded_buffer + length, 0, totalpaddedlength - length); return padded_buffer; } // allocate_padded_buffer() @@ -43,7 +43,7 @@ inline padded_string::padded_string(size_t length) noexcept inline padded_string::padded_string(const char *data, size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { if ((data != nullptr) and (data_ptr != nullptr)) { - memcpy(data_ptr, data, length); + std::memcpy(data_ptr, data, length); data_ptr[length] = '\0'; // easier when you need a c_str } } @@ -51,7 +51,7 @@ inline padded_string::padded_string(const char *data, size_t length) noexcept inline padded_string::padded_string(const std::string & str_ ) noexcept : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { if (data_ptr != nullptr) { - memcpy(data_ptr, str_.data(), str_.size()); + std::memcpy(data_ptr, str_.data(), str_.size()); data_ptr[str_.size()] = '\0'; // easier when you need a c_str } } @@ -59,7 +59,7 @@ inline padded_string::padded_string(const std::string & str_ ) noexcept inline padded_string::padded_string(std::string_view sv_) noexcept : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { if (data_ptr != nullptr) { - memcpy(data_ptr, sv_.data(), sv_.size()); + std::memcpy(data_ptr, sv_.data(), sv_.size()); data_ptr[sv_.size()] = '\0'; // easier when you need a c_str } } diff --git a/src/generic/stage1/buf_block_reader.h b/src/generic/stage1/buf_block_reader.h index 9dd6e88f..da4038d5 100644 --- a/src/generic/stage1/buf_block_reader.h +++ b/src/generic/stage1/buf_block_reader.h @@ -76,8 +76,8 @@ simdjson_really_inline const uint8_t *buf_block_reader::full_block() template simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - memset(dst, 0x20, STEP_SIZE); // memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - memcpy(dst, buf + idx, len - idx); + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); return len - idx; } diff --git a/src/generic/stage2/tape_builder.h b/src/generic/stage2/tape_builder.h index f50556f1..a4c87f2d 100644 --- a/src/generic/stage2/tape_builder.h +++ b/src/generic/stage2/tape_builder.h @@ -179,8 +179,8 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_ // uint8_t *copy = static_cast(malloc(iter.remaining_len() + SIMDJSON_PADDING)); if (copy == nullptr) { return MEMALLOC; } - memcpy(copy, value, iter.remaining_len()); - memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); + std::memcpy(copy, value, iter.remaining_len()); + std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); error_code error = visit_number(iter, copy); free(copy); return error; diff --git a/tests/basictests.cpp b/tests/basictests.cpp index d9b22714..7f39023a 100644 --- a/tests/basictests.cpp +++ b/tests/basictests.cpp @@ -255,10 +255,10 @@ namespace parse_api_tests { uint64_t count = 0; constexpr const int BATCH_SIZE = 128; uint8_t empty_batches_ndjson[BATCH_SIZE*16+SIMDJSON_PADDING]; - memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING); - memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1); - memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1); - memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1); + std::memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING); + std::memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1); + std::memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1); + std::memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1); simdjson::dom::document_stream stream; ASSERT_SUCCESS( parser.parse_many(empty_batches_ndjson, BATCH_SIZE*16).get(stream) ); for (auto doc : stream) { diff --git a/tests/errortests.cpp b/tests/errortests.cpp index 54cac920..ca3532e0 100644 --- a/tests/errortests.cpp +++ b/tests/errortests.cpp @@ -150,7 +150,7 @@ namespace adversarial { bool number_overrun_at_root() { TEST_START(); constexpr const char *json = "1" PADDING_FILLED_WITH_NUMBERS ","; - constexpr size_t len = 1; // strlen("1"); + constexpr size_t len = 1; // std::strlen("1"); dom::parser parser; uint64_t foo; @@ -161,7 +161,7 @@ namespace adversarial { bool number_overrun_in_array() { TEST_START(); constexpr const char *json = "[1" PADDING_FILLED_WITH_NUMBERS "]"; - constexpr size_t len = 2; // strlen("[1"); + constexpr size_t len = 2; // std::strlen("[1"); dom::parser parser; uint64_t foo; @@ -171,7 +171,7 @@ namespace adversarial { bool number_overrun_in_object() { TEST_START(); constexpr const char *json = "{\"key\":1" PADDING_FILLED_WITH_NUMBERS "}"; - constexpr size_t len = 8; // strlen("{\"key\":1"); + constexpr size_t len = 8; // std::strlen("{\"key\":1"); dom::parser parser; uint64_t foo; @@ -179,7 +179,7 @@ namespace adversarial { TEST_SUCCEED(); } bool run() { - static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = strlen(PADDING_FILLED_WITH_NUMBERS) + static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = std::strlen(PADDING_FILLED_WITH_NUMBERS) return true && number_overrun_at_root() && number_overrun_in_array() diff --git a/tests/jsoncheck.cpp b/tests/jsoncheck.cpp index 4f8fa7a4..40e9073c 100644 --- a/tests/jsoncheck.cpp +++ b/tests/jsoncheck.cpp @@ -23,7 +23,7 @@ static bool has_extension(const char *filename, const char *extension) { } bool starts_with(const char *pre, const char *str) { - size_t len_pre = strlen(pre), len_str = strlen(str); + size_t len_pre = std::strlen(pre), len_str = std::strlen(str); return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0; } @@ -34,7 +34,7 @@ bool contains(const char *pre, const char *str) { bool validate(const char *dirname) { bool everything_fine = true; const char *extension = ".json"; - size_t dirlen = strlen(dirname); + size_t dirlen = std::strlen(dirname); struct dirent **entry_list; int c = scandir(dirname, &entry_list, nullptr, alphasort); if (c < 0) { @@ -56,7 +56,7 @@ bool validate(const char *dirname) { if (has_extension(name, extension)) { printf("validating: file %s ", name); fflush(nullptr); - size_t namelen = strlen(name); + size_t namelen = std::strlen(name); size_t fullpathlen = dirlen + 1 + namelen + 1; char *fullpath = static_cast(malloc(fullpathlen)); snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name); diff --git a/tests/minefieldcheck.cpp b/tests/minefieldcheck.cpp index 39e18927..469dc768 100644 --- a/tests/minefieldcheck.cpp +++ b/tests/minefieldcheck.cpp @@ -21,7 +21,7 @@ static bool has_extension(const char *filename, const char *extension) { } bool starts_with(const char *pre, const char *str) { - size_t len_pre = strlen(pre), len_str = strlen(str); + size_t len_pre = std::strlen(pre), len_str = std::strlen(str); return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0; } @@ -32,7 +32,7 @@ bool contains(const char *pre, const char *str) { bool validate_minefield(const char *dirname) { bool everything_fine = true; const char *extension = ".json"; - size_t dirlen = strlen(dirname); + size_t dirlen = std::strlen(dirname); struct dirent **entry_list; int c = scandir(dirname, &entry_list, nullptr, alphasort); if (c < 0) { @@ -54,7 +54,7 @@ bool validate_minefield(const char *dirname) { if (has_extension(name, extension)) { printf("validating: file %s ", name); fflush(nullptr); - size_t namelen = strlen(name); + size_t namelen = std::strlen(name); size_t fullpathlen = dirlen + 1 + namelen + 1; char *fullpath = static_cast(malloc(fullpathlen)); snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name); diff --git a/tests/numberparsingcheck.cpp b/tests/numberparsingcheck.cpp index cd7bbbcc..73c3e136 100644 --- a/tests/numberparsingcheck.cpp +++ b/tests/numberparsingcheck.cpp @@ -54,7 +54,7 @@ size_t invalid_count; const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"}; bool starts_with(const char *pre, const char *str) { - size_t lenpre = strlen(pre); + size_t lenpre = std::strlen(pre); return strncmp(pre, str, lenpre) == 0; } @@ -168,7 +168,7 @@ bool validate(const char *dirname) { parse_error = 0; size_t total_count = 0; const char *extension = ".json"; - size_t dirlen = strlen(dirname); + size_t dirlen = std::strlen(dirname); struct dirent **entry_list; int c = scandir(dirname, &entry_list, 0, alphasort); if (c < 0) { @@ -183,7 +183,7 @@ bool validate(const char *dirname) { for (int i = 0; i < c; i++) { const char *name = entry_list[i]->d_name; if (has_extension(name, extension)) { - size_t filelen = strlen(name); + size_t filelen = std::strlen(name); fullpath = (char *)malloc(dirlen + filelen + 1 + 1); strcpy(fullpath, dirname); if (needsep) { diff --git a/tests/parse_many_test.cpp b/tests/parse_many_test.cpp index 8add8328..a4d28986 100644 --- a/tests/parse_many_test.cpp +++ b/tests/parse_many_test.cpp @@ -22,7 +22,7 @@ static bool has_extension(const char *filename, const char *extension) { } bool starts_with(const char *pre, const char *str) { - size_t len_pre = strlen(pre), len_str = strlen(str); + size_t len_pre = std::strlen(pre), len_str = std::strlen(str); return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0; } @@ -36,7 +36,7 @@ bool validate(const char *dirname) { const char *extension2 = ".jsonl"; const char *extension3 = ".json"; // bad json files shoud fail - size_t dirlen = strlen(dirname); + size_t dirlen = std::strlen(dirname); struct dirent **entry_list; int c = scandir(dirname, &entry_list, nullptr, alphasort); if (c < 0) { @@ -63,7 +63,7 @@ bool validate(const char *dirname) { /* Finding the file path */ printf("validating: file %s ", name); fflush(nullptr); - size_t namelen = strlen(name); + size_t namelen = std::strlen(name); size_t fullpathlen = dirlen + 1 + namelen + 1; char *fullpath = static_cast(malloc(fullpathlen)); snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name); diff --git a/tests/pointercheck.cpp b/tests/pointercheck.cpp index 6af52d74..861f2153 100644 --- a/tests/pointercheck.cpp +++ b/tests/pointercheck.cpp @@ -179,7 +179,7 @@ bool issue1142() { ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(example3)); const char * input_array = "[]"; - size_t input_length = strlen(input_array); + size_t input_length = std::strlen(input_array); auto element4 = parser.parse(input_array, input_length).at_pointer("");; ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(element4)); diff --git a/tests/readme_examples.cpp b/tests/readme_examples.cpp index 922bead2..04a580d3 100644 --- a/tests/readme_examples.cpp +++ b/tests/readme_examples.cpp @@ -261,7 +261,7 @@ SIMDJSON_POP_DISABLE_WARNINGS void minify() { const char * some_string = "[ 1, 2, 3, 4] "; - size_t length = strlen(some_string); + size_t length = std::strlen(some_string); std::unique_ptr buffer{new char[length]}; size_t new_length{}; auto error = simdjson::minify(some_string, length, buffer.get(), new_length); @@ -270,7 +270,7 @@ void minify() { abort(); } else { const char * expected_string = "[1,2,3,4]"; - size_t expected_length = strlen(expected_string); + size_t expected_length = std::strlen(expected_string); if(expected_length != new_length) { std::cerr << "mismatched length (error) " << std::endl; abort(); @@ -286,14 +286,14 @@ void minify() { bool is_correct() { const char * some_string = "[ 1, 2, 3, 4] "; - size_t length = strlen(some_string); + size_t length = std::strlen(some_string); bool is_ok = simdjson::validate_utf8(some_string, length); return is_ok; } bool is_correct_string_view() { const char * some_string = "[ 1, 2, 3, 4] "; - size_t length = strlen(some_string); + size_t length = std::strlen(some_string); std::string_view v(some_string, length); bool is_ok = simdjson::validate_utf8(v); return is_ok; @@ -305,6 +305,31 @@ bool is_correct_string() { return is_ok; } +void parse_documentation() { + const char *json = R"({"key":"value"})"; + const size_t json_len = std::strlen(json); + simdjson::dom::parser parser; + simdjson::dom::element element = parser.parse(json, json_len); + // Next line is to avoid unused warning. + (void)element; +} + + +void parse_documentation_lowlevel() { + // Such low-level code is not generally recommended. Please + // see parse_documentation() instead. + // Motivation: https://github.com/simdjson/simdjson/issues/1175 + const char *json = R"({"key":"value"})"; + const size_t json_len = std::strlen(json); + std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; + std::memcpy(padded_json_copy.get(), json, json_len); + std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); + simdjson::dom::parser parser; + simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); + // Next line is to avoid unused warning. + (void)element; +} + int main() { basics_dom_1(); basics_dom_2(); diff --git a/tests/stringparsingcheck.cpp b/tests/stringparsingcheck.cpp index 11976ae1..18bc7219 100644 --- a/tests/stringparsingcheck.cpp +++ b/tests/stringparsingcheck.cpp @@ -305,7 +305,7 @@ static bool has_extension(const char *filename, const char *extension) { } bool starts_with(const char *pre, const char *str) { - size_t lenpre = strlen(pre), lenstr = strlen(str); + size_t lenpre = std::strlen(pre), lenstr = std::strlen(str); return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0; } @@ -313,7 +313,7 @@ bool validate(const char *dirname) { size_t total_strings = 0; probable_bug = false; const char *extension = ".json"; - size_t dirlen = strlen(dirname); + size_t dirlen = std::strlen(dirname); struct dirent **entry_list; int c = scandir(dirname, &entry_list, 0, alphasort); if (c < 0) { @@ -328,7 +328,7 @@ bool validate(const char *dirname) { for (int i = 0; i < c; i++) { const char *name = entry_list[i]->d_name; if (has_extension(name, extension)) { - size_t filelen = strlen(name); + size_t filelen = std::strlen(name); fullpath = (char *)malloc(dirlen + filelen + 1 + 1); strcpy(fullpath, dirname); if (needsep) { diff --git a/tests/unicode_tests.cpp b/tests/unicode_tests.cpp index d055d9ed..e9c9c282 100644 --- a/tests/unicode_tests.cpp +++ b/tests/unicode_tests.cpp @@ -224,14 +224,14 @@ void test() { "\x91\x85\x95\x9e", "\x6c\x02\x8e\x18"}; for (size_t i = 0; i < 8; i++) { - size_t len = strlen(goodsequences[i]); + size_t len = std::strlen(goodsequences[i]); if (!simdjson::validate_utf8(goodsequences[i], len)) { printf("bug goodsequences[%zu]\n", i); abort(); } } for (size_t i = 0; i < 26; i++) { - size_t len = strlen(badsequences[i]); + size_t len = std::strlen(badsequences[i]); if (simdjson::validate_utf8(badsequences[i], len)) { printf("bug lookup2 badsequences[%zu]\n", i); abort();