diff --git a/doc/basics.md b/doc/basics.md index cd085fea..83f8deae 100644 --- a/doc/basics.md +++ b/doc/basics.md @@ -8,6 +8,7 @@ An overview of what you need to know to use simdjson, with examples. * [Using simdjson as a CMake dependency](#using-simdjson-as-a-cmake-dependency) * [The Basics: Loading and Parsing JSON Documents](#the-basics-loading-and-parsing-json-documents) * [Using the Parsed JSON](#using-the-parsed-json) +* [C++11 Support and string_view](#c++11-support-and-string_view) * [C++17 Support](#c++17-support) * [Minifying JSON strings without parsing](#minifying-json-strings-without-parsing) * [UTF-8 validation (alone)](#utf-8-validation-alone) @@ -192,6 +193,27 @@ And another one: cout << "number: " << v << endl; ``` + +C++11 Support and string_view +------------- + +The simdjson library builds on compilers supporting the [C++11 standard](https://en.wikipedia.org/wiki/C%2B%2B11). It is also a strict requirement: we have no plan to support older C++ compilers. + +We represent parsed strings in simdjson using the `std::string_view` class. It avoids +the need to copy the data, as would be necessary with the `std::string` class. It also +avoids the pitfalls of null-terminated C strings. + +The `std::string_view` class has become standard as part of C++17 but it is not always available +on compilers which only supports C++11. When we detect that `string_view` is natively +available, we define the macro `SIMDJSON_HAS_STRING_VIEW`. + +When we detect that it is unavailable, +we use [string-view-lite](https://github.com/martinmoene/string-view-lite) as a +substitute. In such cases, we use the type alias `using string_view = nonstd::string_view;` to +offer the same API, irrespective of the compiler and standard library. The macro +`SIMDJSON_HAS_STRING_VIEW` will be *undefined* to indicate that we emulate `string_view`. + + C++17 Support ------------- diff --git a/include/simdjson/dom/element.h b/include/simdjson/dom/element.h index b03184d1..1da0ffb6 100644 --- a/include/simdjson/dom/element.h +++ b/include/simdjson/dom/element.h @@ -62,21 +62,42 @@ public: */ inline simdjson_result get_object() const noexcept; /** - * Cast this element to a string. + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. * - * Equivalent to get(). + * The get_c_str() function is equivalent to get(). + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. * - * @returns An pointer to a null-terminated string. This string is stored in the parser and will + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will * be invalidated the next time it parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ inline simdjson_result get_c_str() const noexcept; /** - * Cast this element to a string. + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * - * @returns A string. The string is stored in the parser and will be invalidated the next time it + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it * parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ @@ -253,7 +274,9 @@ public: inline operator bool() const noexcept(false); /** - * Read this element as a null-terminated string. + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. @@ -264,7 +287,7 @@ public: inline explicit operator const char*() const noexcept(false); /** - * Read this element as a null-terminated string. + * Read this element as a null-terminated UTF-8 string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. @@ -464,6 +487,7 @@ public: really_inline simdjson_result get_array() const noexcept; really_inline simdjson_result get_object() const noexcept; really_inline simdjson_result get_c_str() const noexcept; + really_inline simdjson_result get_string_length() const noexcept; really_inline simdjson_result get_string() const noexcept; really_inline simdjson_result get_int64() const noexcept; really_inline simdjson_result get_uint64() const noexcept; diff --git a/include/simdjson/inline/element.h b/include/simdjson/inline/element.h index 9f7e52b0..20ea1497 100644 --- a/include/simdjson/inline/element.h +++ b/include/simdjson/inline/element.h @@ -50,6 +50,10 @@ really_inline simdjson_result simdjson_result::get_c if (error()) { return error(); } return first.get_c_str(); } +really_inline simdjson_result simdjson_result::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); +} really_inline simdjson_result simdjson_result::get_string() const noexcept { if (error()) { return error(); } return first.get_string(); @@ -190,6 +194,15 @@ inline simdjson_result element::get_c_str() const noexcept { return INCORRECT_TYPE; } } +inline simdjson_result element::get_string_length() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_string_length(); + } + default: + return INCORRECT_TYPE; + } +} inline simdjson_result element::get_string() const noexcept { switch (tape.tape_ref_type()) { case internal::tape_type::STRING: