Improve documentation on padding

- Improves and clarifies the documentation on padding. - Use std:: prefix for memcpy, strlen etc. Related to issues #1175 and #1178
2020-09-23 03:07:14 -04:00 · 2020-09-23 03:07:14 -04:00 · f410213003
parent 19cb5d57db
commit f410213003
25 changed files with 129 additions and 58 deletions
--- a/benchmark/linux/linux-perf-events.h
+++ b/benchmark/linux/linux-perf-events.h
@ -8,7 +8,7 @@
 #include <unistd.h>           // for syscall

 #include <cerrno>  // for errno
-#include <cstring> // for memset
+#include <cstring> // for std::memset
 #include <stdexcept>

 #include <iostream>
@ -24,7 +24,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {

 public:
  explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
-    memset(&attribs, 0, sizeof(attribs));
+    std::memset(&attribs, 0, sizeof(attribs));
    attribs.type = TYPE;
    attribs.size = sizeof(attribs);
    attribs.disabled = 1;
--- a/benchmark/twitter/sax_tweet_reader_visitor.h
+++ b/benchmark/twitter/sax_tweet_reader_visitor.h
@ -296,8 +296,8 @@ simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const ch
 }

 sax_tweet_reader_visitor::field_lookup::field_lookup() {
-  add("\"statuses\"", strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
-  #define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
+  add("\"statuses\"", std::strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
+  #define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
  TWEET_FIELD(id, field_type::unsigned_integer);
  TWEET_FIELD(in_reply_to_status_id, field_type::nullable_unsigned_integer);
  TWEET_FIELD(retweet_count, field_type::unsigned_integer);
@ -306,7 +306,7 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() {
  TWEET_FIELD(created_at, field_type::string);
  TWEET_FIELD(user, field_type::object)
  #undef TWEET_FIELD
-  #define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
+  #define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
  USER_FIELD(id, field_type::unsigned_integer);
  USER_FIELD(screen_name, field_type::string);
  #undef USER_FIELD
--- a/doc/basics.md
+++ b/doc/basics.md
@ -278,7 +278,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but
  // Starts with a valid JSON document as a string.
  // It does not have to be null-terminated.
  const char * some_string = "[ 1, 2, 3, 4] ";
-  size_t length = strlen(some_string);
+  size_t length = std::strlen(some_string);
  // Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).  
  std::unique_ptr<char[]> buffer{new char[length]};
  size_t new_length{}; // It will receive the minified length.
@ -296,7 +296,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many

 ```C++
  const char * some_string = "[ 1, 2, 3, 4] ";
-  size_t length = strlen(some_string);
+  size_t length = std::strlen(some_string);
  bool is_ok = simdjson::validate_utf8(some_string, length);
 ```

--- a/doc/basics_doxygen.md
+++ b/doc/basics_doxygen.md
@ -260,7 +260,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but
  // Starts with a valid JSON document as a string.
  // It does not have to be null-terminated.
  const char * some_string = "[ 1, 2, 3, 4] ";
-  size_t length = strlen(some_string);
+  size_t length = std::strlen(some_string);
  // Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
  std::unique_ptr<char[]> buffer{new char[length]};
  size_t new_length{}; // It will receive the minified length.
@ -278,7 +278,7 @@ The simdjson library has fast functions to validate UTF-8 strings. They are many

 ```
  const char * some_string = "[ 1, 2, 3, 4] ";
-  size_t length = strlen(some_string);
+  size_t length = std::strlen(some_string);
  bool is_ok = simdjson::validate_utf8(some_string, length);
 ```

--- a/doc/performance.md
+++ b/doc/performance.md
@ -12,6 +12,8 @@ are still some scenarios where tuning can enhance performance.
 * [Visual Studio](#visual-studio)
 * [Downclocking](#downclocking)
 * [Best Use of the DOM API](#best-use-of-the-dom-api)
+* [Padding and Temporary Copies](#padding-and-temporary-copies)
+

 Reusing the parser for maximum efficiency
 -----------------------------------------
@ -174,3 +176,25 @@ Best Use of the DOM API

 The simdjson API provides access to the JSON DOM (document-object-model) content as a tree of `dom::element` instances, each representing an object, an array or an atomic type (null, true, false, number). These `dom::element` instances are lightweight objects (e.g., spanning 16 bytes) and it might be advantageous to pass them by value, as opposed to passing them by reference or by pointer.

+Padding and Temporary Copies
+--------------
+
+The simdjson function `parser.parse` reads data from a padded  buffer, containing SIMDJSON_PADDING extra bytes added at the end.
+If you are passing a `padded_string` to `parser.parse` or loading the JSON directly from
+disk (`parser.load`), padding is automatically  handled.
+When calling `parser.parse` on a pointer (e.g., `parser.parse(mystring, mylength)`) a temporary copy  is made by default with adequate padding and you, again, do not need to be concerned with padding.
+
+Some users may not be able use our `padded_string` class or to load the data directly from disk (`parser.load`). They may need to pass data pointers to the library.  If these users wish to avoid temporary copies and corresponding temporary memory allocations, they may want to call `parser.parse` with the `realloc_if_needed` parameter set to false (e.g., `parser.parse(mystring, mylength, false)`). In such cases, they need to ensure that there are at least SIMDJSON_PADDING extra bytes at the end that can be safely accessed and read. They do not need to initialize the padded bytes to any value in particular. The following example is safe:
+
+
+```C++
+const char *json      = R"({"key":"value"})";
+const size_t json_len = std::strlen(json);
+std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
+memcpy(padded_json_copy.get(), json, json_len);
+memset(padded_json_copy.get() + json_len, 0, SIMDJSON_PADDING);
+simdjson::dom::parser parser;
+simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
+````
+
+Setting the `realloc_if_needed` parameter false in this manner may lead to better performance, but it requires that the user takes more responsibilities: the simdjson library cannot verify that the input buffer was padded.
--- a/include/simdjson/dom/document-inl.h
+++ b/include/simdjson/dom/document-inl.h
@ -66,7 +66,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
    switch (type) {
    case '"': // we have a string
      os << "string \"";
-      memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
+      std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
      os << internal::escape_json_string(std::string_view(
        (const char *)(string_buf.get() + payload + sizeof(uint32_t)),
        string_length
@ -92,7 +92,7 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
        return false;
      }
      double answer;
-      memcpy(&answer, &tape[++tape_idx], sizeof(answer));
+      std::memcpy(&answer, &tape[++tape_idx], sizeof(answer));
      os << answer << '\n';
      break;
    case 'n': // we have a null
--- a/include/simdjson/dom/parsedjson_iterator-inl.h
+++ b/include/simdjson/dom/parsedjson_iterator-inl.h
@ -252,7 +252,7 @@ dom::parser::Iterator::Iterator(
    current_val(o.current_val)
 {
  depth_index = new scopeindex_t[max_depth+1];
-  memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
+  std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
 }

 dom::parser::Iterator::~Iterator() noexcept {
--- a/include/simdjson/dom/parsedjson_iterator.h
+++ b/include/simdjson/dom/parsedjson_iterator.h
@ -78,7 +78,7 @@ public:
  // return the length of the string in bytes
  inline uint32_t get_string_length() const {
      uint32_t answer;
-      memcpy(&answer,
+      std::memcpy(&answer,
          reinterpret_cast<const char *>(doc.string_buf.get() +
                                          (current_val & internal::JSON_VALUE_MASK)),
          sizeof(uint32_t));
@ -93,7 +93,7 @@ public:
                                                      // case of error
      }
      double answer;
-      memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
+      std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
      return answer;
  }

--- a/include/simdjson/dom/parser-inl.h
+++ b/include/simdjson/dom/parser-inl.h
@ -98,7 +98,7 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
  if (realloc_if_needed) {
    tmp_buf.reset((uint8_t *)internal::allocate_padded_buffer(len));
    if (tmp_buf.get() == nullptr) { return MEMALLOC; }
-    memcpy((void *)tmp_buf.get(), buf, len);
+    std::memcpy((void *)tmp_buf.get(), buf, len);
  }
  _error = implementation->parse(realloc_if_needed ? tmp_buf.get() : buf, len, doc);
  if (_error) { return _error; }
--- a/include/simdjson/dom/parser.h
+++ b/include/simdjson/dom/parser.h
@ -114,8 +114,30 @@ public:
   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
   * those bytes are initialized to, as long as they are allocated.
   *
-   * If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
-   * and it is copied into an enlarged temporary buffer before parsing.
+   * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
+   * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: 
+   * 
+   *   const char *json      = R"({"key":"value"})";
+   *   const size_t json_len = std::strlen(json);
+   *   simdjson::dom::parser parser;
+   *   simdjson::dom::element element = parser.parse(json, json_len);
+   * 
+   * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), 
+   * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
+   * The benefit of setting realloc_if_needed to false is that you avoid a temporary
+   * memory allocation and a copy.
+   * 
+   * The padded bytes may be read. It is not important how you initialize
+   * these bytes though we recommend a sensible default like null character values or spaces.
+   * For example, the following low-level code is safe:
+   * 
+   *   const char *json      = R"({"key":"value"})";
+   *   const size_t json_len = std::strlen(json);
+   *   std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
+   *   std::memcpy(padded_json_copy.get(), json, json_len);
+   *   std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
+   *   simdjson::dom::parser parser;
+   *   simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
   *
   * ### Parser Capacity
   *
--- a/include/simdjson/error.h
+++ b/include/simdjson/error.h
@ -43,7 +43,7 @@ enum error_code {
 *
 *   dom::parser parser;
 *   dom::element doc;
- *   auto error = parser.parse("foo").get(doc);
+ *   auto error = parser.parse("foo",3).get(doc);
 *   if (error) { printf("Error: %s\n", error_message(error)); }
 *
 * @return The error message.
--- a/include/simdjson/internal/tape_ref-inl.h
+++ b/include/simdjson/internal/tape_ref-inl.h
@ -81,14 +81,14 @@ simdjson_really_inline T tape_ref::next_tape_value() const noexcept {
  // It is not generally safe. It is safer, and often faster to rely
  // on memcpy. Yes, it is uglier, but it is also encapsulated.
  T x;
-  memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
+  std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
  return x;
 }

 simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
  size_t string_buf_index = size_t(tape_value());
  uint32_t len;
-  memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
+  std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
  return len;
 }

--- a/include/simdjson/padded_string-inl.h
+++ b/include/simdjson/padded_string-inl.h
@ -27,7 +27,7 @@ inline char *allocate_padded_buffer(size_t length) noexcept {
  // We write zeroes in the padded region to avoid having uninitized 
  // garbage. If nothing else, garbage getting read might trigger a 
  // warning in a memory checking.
-  memset(padded_buffer + length, 0, totalpaddedlength - length);
+  std::memset(padded_buffer + length, 0, totalpaddedlength - length);
  return padded_buffer;
 } // allocate_padded_buffer()

@ -43,7 +43,7 @@ inline padded_string::padded_string(size_t length) noexcept
 inline padded_string::padded_string(const char *data, size_t length) noexcept
    : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
  if ((data != nullptr) and (data_ptr != nullptr)) {
-    memcpy(data_ptr, data, length);
+    std::memcpy(data_ptr, data, length);
    data_ptr[length] = '\0'; // easier when you need a c_str
  }
 }
@ -51,7 +51,7 @@ inline padded_string::padded_string(const char *data, size_t length) noexcept
 inline padded_string::padded_string(const std::string & str_ ) noexcept
    : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
  if (data_ptr != nullptr) {
-    memcpy(data_ptr, str_.data(), str_.size());
+    std::memcpy(data_ptr, str_.data(), str_.size());
    data_ptr[str_.size()] = '\0'; // easier when you need a c_str
  }
 }
@ -59,7 +59,7 @@ inline padded_string::padded_string(const std::string & str_ ) noexcept
 inline padded_string::padded_string(std::string_view sv_) noexcept
    : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
  if (data_ptr != nullptr) {
-    memcpy(data_ptr, sv_.data(), sv_.size());
+    std::memcpy(data_ptr, sv_.data(), sv_.size());
    data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
  }
 }
--- a/src/generic/stage1/buf_block_reader.h
+++ b/src/generic/stage1/buf_block_reader.h
@ -76,8 +76,8 @@ simdjson_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block()
 template<size_t STEP_SIZE>
 simdjson_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
-  memset(dst, 0x20, STEP_SIZE); // memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
-  memcpy(dst, buf + idx, len - idx);
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
  return len - idx;
 }

--- a/src/generic/stage2/tape_builder.h
+++ b/src/generic/stage2/tape_builder.h
@ -179,8 +179,8 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_
  //
  uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
  if (copy == nullptr) { return MEMALLOC; }
-  memcpy(copy, value, iter.remaining_len());
-  memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  std::memcpy(copy, value, iter.remaining_len());
+  std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
  error_code error = visit_number(iter, copy);
  free(copy);
  return error;
--- a/tests/basictests.cpp
+++ b/tests/basictests.cpp
@ -255,10 +255,10 @@ namespace parse_api_tests {
    uint64_t count = 0;
    constexpr const int BATCH_SIZE = 128;
    uint8_t empty_batches_ndjson[BATCH_SIZE*16+SIMDJSON_PADDING];
-    memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING);
-    memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1);
-    memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1);
-    memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1);
+    std::memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING);
+    std::memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1);
+    std::memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1);
+    std::memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1);
    simdjson::dom::document_stream stream;
    ASSERT_SUCCESS( parser.parse_many(empty_batches_ndjson, BATCH_SIZE*16).get(stream) );
    for (auto doc : stream) {
--- a/tests/errortests.cpp
+++ b/tests/errortests.cpp
@ -150,7 +150,7 @@ namespace adversarial {
  bool number_overrun_at_root() {
    TEST_START();
    constexpr const char *json = "1" PADDING_FILLED_WITH_NUMBERS ",";
-    constexpr size_t len = 1; // strlen("1");
+    constexpr size_t len = 1; // std::strlen("1");

    dom::parser parser;
    uint64_t foo;
@ -161,7 +161,7 @@ namespace adversarial {
  bool number_overrun_in_array() {
    TEST_START();
    constexpr const char *json = "[1" PADDING_FILLED_WITH_NUMBERS "]";
-    constexpr size_t len = 2; // strlen("[1");
+    constexpr size_t len = 2; // std::strlen("[1");

    dom::parser parser;
    uint64_t foo;
@ -171,7 +171,7 @@ namespace adversarial {
  bool number_overrun_in_object() {
    TEST_START();
    constexpr const char *json = "{\"key\":1" PADDING_FILLED_WITH_NUMBERS "}";
-    constexpr size_t len = 8; // strlen("{\"key\":1");
+    constexpr size_t len = 8; // std::strlen("{\"key\":1");

    dom::parser parser;
    uint64_t foo;
@ -179,7 +179,7 @@ namespace adversarial {
    TEST_SUCCEED();
  }
  bool run() {
-    static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = strlen(PADDING_FILLED_WITH_NUMBERS)
+    static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = std::strlen(PADDING_FILLED_WITH_NUMBERS)
    return true
      && number_overrun_at_root()
      && number_overrun_in_array()
--- a/tests/jsoncheck.cpp
+++ b/tests/jsoncheck.cpp
@ -23,7 +23,7 @@ static bool has_extension(const char *filename, const char *extension) {
 }

 bool starts_with(const char *pre, const char *str) {
-  size_t len_pre = strlen(pre), len_str = strlen(str);
+  size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
  return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
 }

@ -34,7 +34,7 @@ bool contains(const char *pre, const char *str) {
 bool validate(const char *dirname) {
  bool everything_fine = true;
  const char *extension = ".json";
-  size_t dirlen = strlen(dirname);
+  size_t dirlen = std::strlen(dirname);
  struct dirent **entry_list;
  int c = scandir(dirname, &entry_list, nullptr, alphasort);
  if (c < 0) {
@ -56,7 +56,7 @@ bool validate(const char *dirname) {
    if (has_extension(name, extension)) {
      printf("validating: file %s ", name);
      fflush(nullptr);
-      size_t namelen = strlen(name);
+      size_t namelen = std::strlen(name);
      size_t fullpathlen = dirlen + 1 + namelen + 1;
      char *fullpath = static_cast<char *>(malloc(fullpathlen));
      snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
--- a/tests/minefieldcheck.cpp
+++ b/tests/minefieldcheck.cpp
@ -21,7 +21,7 @@ static bool has_extension(const char *filename, const char *extension) {
 }

 bool starts_with(const char *pre, const char *str) {
-  size_t len_pre = strlen(pre), len_str = strlen(str);
+  size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
  return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
 }

@ -32,7 +32,7 @@ bool contains(const char *pre, const char *str) {
 bool validate_minefield(const char *dirname) {
  bool everything_fine = true;
  const char *extension = ".json";
-  size_t dirlen = strlen(dirname);
+  size_t dirlen = std::strlen(dirname);
  struct dirent **entry_list;
  int c = scandir(dirname, &entry_list, nullptr, alphasort);
  if (c < 0) {
@ -54,7 +54,7 @@ bool validate_minefield(const char *dirname) {
    if (has_extension(name, extension)) {
      printf("validating: file %s ", name);
      fflush(nullptr);
-      size_t namelen = strlen(name);
+      size_t namelen = std::strlen(name);
      size_t fullpathlen = dirlen + 1 + namelen + 1;
      char *fullpath = static_cast<char *>(malloc(fullpathlen));
      snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
--- a/tests/numberparsingcheck.cpp
+++ b/tests/numberparsingcheck.cpp
@ -54,7 +54,7 @@ size_t invalid_count;
 const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};

 bool starts_with(const char *pre, const char *str) {
-  size_t lenpre = strlen(pre);
+  size_t lenpre = std::strlen(pre);
  return strncmp(pre, str, lenpre) == 0;
 }

@ -168,7 +168,7 @@ bool validate(const char *dirname) {
  parse_error = 0;
  size_t total_count = 0;
  const char *extension = ".json";
-  size_t dirlen = strlen(dirname);
+  size_t dirlen = std::strlen(dirname);
  struct dirent **entry_list;
  int c = scandir(dirname, &entry_list, 0, alphasort);
  if (c < 0) {
@ -183,7 +183,7 @@ bool validate(const char *dirname) {
  for (int i = 0; i < c; i++) {
    const char *name = entry_list[i]->d_name;
    if (has_extension(name, extension)) {
-      size_t filelen = strlen(name);
+      size_t filelen = std::strlen(name);
      fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
      strcpy(fullpath, dirname);
      if (needsep) {
--- a/tests/parse_many_test.cpp
+++ b/tests/parse_many_test.cpp
@ -22,7 +22,7 @@ static bool has_extension(const char *filename, const char *extension) {
 }

 bool starts_with(const char *pre, const char *str) {
-    size_t len_pre = strlen(pre), len_str = strlen(str);
+    size_t len_pre = std::strlen(pre), len_str = std::strlen(str);
    return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
 }

@ -36,7 +36,7 @@ bool validate(const char *dirname) {
    const char *extension2 = ".jsonl";
    const char *extension3 = ".json"; // bad json files shoud fail

-    size_t dirlen = strlen(dirname);
+    size_t dirlen = std::strlen(dirname);
    struct dirent **entry_list;
    int c = scandir(dirname, &entry_list, nullptr, alphasort);
    if (c < 0) {
@ -63,7 +63,7 @@ bool validate(const char *dirname) {
            /*  Finding the file path  */
            printf("validating: file %s ", name);
            fflush(nullptr);
-            size_t namelen = strlen(name);
+            size_t namelen = std::strlen(name);
            size_t fullpathlen = dirlen + 1 + namelen + 1;
            char *fullpath = static_cast<char *>(malloc(fullpathlen));
            snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
--- a/tests/pointercheck.cpp
+++ b/tests/pointercheck.cpp
@ -179,7 +179,7 @@ bool issue1142() {
  ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(example3));

  const char * input_array = "[]";
-  size_t input_length = strlen(input_array);
+  size_t input_length = std::strlen(input_array);
  auto element4 = parser.parse(input_array, input_length).at_pointer("");;
  ASSERT_EQUAL(std::string(R"([])"), simdjson::minify(element4));

--- a/tests/readme_examples.cpp
+++ b/tests/readme_examples.cpp
@ -261,7 +261,7 @@ SIMDJSON_POP_DISABLE_WARNINGS

 void minify() {
  const char * some_string = "[ 1, 2, 3, 4] ";
-  size_t length = strlen(some_string);
+  size_t length = std::strlen(some_string);
  std::unique_ptr<char[]> buffer{new char[length]};
  size_t new_length{};
  auto error = simdjson::minify(some_string, length, buffer.get(), new_length);
@ -270,7 +270,7 @@ void minify() {
    abort();
  } else {
    const char * expected_string = "[1,2,3,4]";
-    size_t expected_length = strlen(expected_string);
+    size_t expected_length = std::strlen(expected_string);
    if(expected_length != new_length) {
      std::cerr << "mismatched length (error) " << std::endl;
      abort();
@ -286,14 +286,14 @@ void minify() {

 bool is_correct() {
  const char * some_string = "[ 1, 2, 3, 4] ";
-  size_t length = strlen(some_string);
+  size_t length = std::strlen(some_string);
  bool is_ok = simdjson::validate_utf8(some_string, length);
  return is_ok;
 }

 bool is_correct_string_view() {
  const char * some_string = "[ 1, 2, 3, 4] ";
-  size_t length = strlen(some_string);
+  size_t length = std::strlen(some_string);
  std::string_view v(some_string, length);
  bool is_ok = simdjson::validate_utf8(v);
  return is_ok;
@ -305,6 +305,31 @@ bool is_correct_string() {
  return is_ok;
 }

+void parse_documentation() {
+  const char *json      = R"({"key":"value"})";
+  const size_t json_len = std::strlen(json);
+  simdjson::dom::parser parser;
+  simdjson::dom::element element = parser.parse(json, json_len);
+  // Next line is to avoid unused warning.
+  (void)element;
+}
+
+
+void parse_documentation_lowlevel() {
+  // Such low-level code is not generally recommended. Please
+  // see parse_documentation() instead.
+  // Motivation: https://github.com/simdjson/simdjson/issues/1175
+  const char *json      = R"({"key":"value"})";
+  const size_t json_len = std::strlen(json);
+  std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
+  std::memcpy(padded_json_copy.get(), json, json_len);
+  std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
+  simdjson::dom::parser parser;
+  simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
+  // Next line is to avoid unused warning.
+  (void)element;
+}
+
 int main() {
  basics_dom_1();
  basics_dom_2();
--- a/tests/stringparsingcheck.cpp
+++ b/tests/stringparsingcheck.cpp
@ -305,7 +305,7 @@ static bool has_extension(const char *filename, const char *extension) {
 }

 bool starts_with(const char *pre, const char *str) {
-  size_t lenpre = strlen(pre), lenstr = strlen(str);
+  size_t lenpre = std::strlen(pre), lenstr = std::strlen(str);
  return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
 }

@ -313,7 +313,7 @@ bool validate(const char *dirname) {
  size_t total_strings = 0;
  probable_bug = false;
  const char *extension = ".json";
-  size_t dirlen = strlen(dirname);
+  size_t dirlen = std::strlen(dirname);
  struct dirent **entry_list;
  int c = scandir(dirname, &entry_list, 0, alphasort);
  if (c < 0) {
@ -328,7 +328,7 @@ bool validate(const char *dirname) {
  for (int i = 0; i < c; i++) {
    const char *name = entry_list[i]->d_name;
    if (has_extension(name, extension)) {
-      size_t filelen = strlen(name);
+      size_t filelen = std::strlen(name);
      fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
      strcpy(fullpath, dirname);
      if (needsep) {
--- a/tests/unicode_tests.cpp
+++ b/tests/unicode_tests.cpp
@ -224,14 +224,14 @@ void test() {
      "\x91\x85\x95\x9e",
      "\x6c\x02\x8e\x18"};
  for (size_t i = 0; i < 8; i++) {
-    size_t len = strlen(goodsequences[i]);
+    size_t len = std::strlen(goodsequences[i]);
    if (!simdjson::validate_utf8(goodsequences[i], len)) {
      printf("bug goodsequences[%zu]\n", i);
      abort();
    }
  }
  for (size_t i = 0; i < 26; i++) {
-    size_t len = strlen(badsequences[i]);
+    size_t len = std::strlen(badsequences[i]);
    if (simdjson::validate_utf8(badsequences[i], len)) {
      printf("bug lookup2 badsequences[%zu]\n", i);
      abort();