Merge pull request #986 from simdjson/issue984

Fixing issue 984
This commit is contained in:
Daniel Lemire 2020-06-25 11:04:19 -04:00 committed by GitHub
commit 1e32897d3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 7 deletions

View File

@ -8,6 +8,7 @@ An overview of what you need to know to use simdjson, with examples.
* [Using simdjson as a CMake dependency](#using-simdjson-as-a-cmake-dependency) * [Using simdjson as a CMake dependency](#using-simdjson-as-a-cmake-dependency)
* [The Basics: Loading and Parsing JSON Documents](#the-basics-loading-and-parsing-json-documents) * [The Basics: Loading and Parsing JSON Documents](#the-basics-loading-and-parsing-json-documents)
* [Using the Parsed JSON](#using-the-parsed-json) * [Using the Parsed JSON](#using-the-parsed-json)
* [C++11 Support and string_view](#c++11-support-and-string_view)
* [C++17 Support](#c++17-support) * [C++17 Support](#c++17-support)
* [Minifying JSON strings without parsing](#minifying-json-strings-without-parsing) * [Minifying JSON strings without parsing](#minifying-json-strings-without-parsing)
* [UTF-8 validation (alone)](#utf-8-validation-alone) * [UTF-8 validation (alone)](#utf-8-validation-alone)
@ -192,6 +193,27 @@ And another one:
cout << "number: " << v << endl; cout << "number: " << v << endl;
``` ```
C++11 Support and string_view
-------------
The simdjson library builds on compilers supporting the [C++11 standard](https://en.wikipedia.org/wiki/C%2B%2B11). It is also a strict requirement: we have no plan to support older C++ compilers.
We represent parsed strings in simdjson using the `std::string_view` class. It avoids
the need to copy the data, as would be necessary with the `std::string` class. It also
avoids the pitfalls of null-terminated C strings.
The `std::string_view` class has become standard as part of C++17 but it is not always available
on compilers which only supports C++11. When we detect that `string_view` is natively
available, we define the macro `SIMDJSON_HAS_STRING_VIEW`.
When we detect that it is unavailable,
we use [string-view-lite](https://github.com/martinmoene/string-view-lite) as a
substitute. In such cases, we use the type alias `using string_view = nonstd::string_view;` to
offer the same API, irrespective of the compiler and standard library. The macro
`SIMDJSON_HAS_STRING_VIEW` will be *undefined* to indicate that we emulate `string_view`.
C++17 Support C++17 Support
------------- -------------

View File

@ -62,21 +62,42 @@ public:
*/ */
inline simdjson_result<object> get_object() const noexcept; inline simdjson_result<object> get_object() const noexcept;
/** /**
* Cast this element to a string. * Cast this element to a null-terminated C string.
*
* The string is guaranteed to be valid UTF-8.
* *
* Equivalent to get<const char *>(). * The get_c_str() function is equivalent to get<const char *>().
*
* The length of the string is given by get_string_length(). Because JSON strings
* may contain null characters, it may be incorrect to use strlen to determine the
* string length.
* *
* @returns An pointer to a null-terminated string. This string is stored in the parser and will * It is possible to get a single string_view instance which represents both the string
* content and its length: see get_string().
*
* @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will
* be invalidated the next time it parses a document or when it is destroyed. * be invalidated the next time it parses a document or when it is destroyed.
* Returns INCORRECT_TYPE if the JSON element is not a string. * Returns INCORRECT_TYPE if the JSON element is not a string.
*/ */
inline simdjson_result<const char *> get_c_str() const noexcept; inline simdjson_result<const char *> get_c_str() const noexcept;
/** /**
* Cast this element to a string. * Gives the length in bytes of the string.
*
* It is possible to get a single string_view instance which represents both the string
* content and its length: see get_string().
*
* @returns A string length in bytes.
* Returns INCORRECT_TYPE if the JSON element is not a string.
*/
inline simdjson_result<size_t> get_string_length() const noexcept;
/**
* Cast this element to a string.
*
* The string is guaranteed to be valid UTF-8.
* *
* Equivalent to get<std::string_view>(). * Equivalent to get<std::string_view>().
* *
* @returns A string. The string is stored in the parser and will be invalidated the next time it * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it
* parses a document or when it is destroyed. * parses a document or when it is destroyed.
* Returns INCORRECT_TYPE if the JSON element is not a string. * Returns INCORRECT_TYPE if the JSON element is not a string.
*/ */
@ -253,7 +274,9 @@ public:
inline operator bool() const noexcept(false); inline operator bool() const noexcept(false);
/** /**
* Read this element as a null-terminated string. * Read this element as a null-terminated UTF-8 string.
*
* Be mindful that JSON allows strings to contain null characters.
* *
* Does *not* convert other types to a string; requires that the JSON type of the element was * Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string. * an actual string.
@ -264,7 +287,7 @@ public:
inline explicit operator const char*() const noexcept(false); inline explicit operator const char*() const noexcept(false);
/** /**
* Read this element as a null-terminated string. * Read this element as a null-terminated UTF-8 string.
* *
* Does *not* convert other types to a string; requires that the JSON type of the element was * Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string. * an actual string.
@ -464,6 +487,7 @@ public:
really_inline simdjson_result<dom::array> get_array() const noexcept; really_inline simdjson_result<dom::array> get_array() const noexcept;
really_inline simdjson_result<dom::object> get_object() const noexcept; really_inline simdjson_result<dom::object> get_object() const noexcept;
really_inline simdjson_result<const char *> get_c_str() const noexcept; really_inline simdjson_result<const char *> get_c_str() const noexcept;
really_inline simdjson_result<size_t> get_string_length() const noexcept;
really_inline simdjson_result<std::string_view> get_string() const noexcept; really_inline simdjson_result<std::string_view> get_string() const noexcept;
really_inline simdjson_result<int64_t> get_int64() const noexcept; really_inline simdjson_result<int64_t> get_int64() const noexcept;
really_inline simdjson_result<uint64_t> get_uint64() const noexcept; really_inline simdjson_result<uint64_t> get_uint64() const noexcept;

View File

@ -50,6 +50,10 @@ really_inline simdjson_result<const char *> simdjson_result<dom::element>::get_c
if (error()) { return error(); } if (error()) { return error(); }
return first.get_c_str(); return first.get_c_str();
} }
really_inline simdjson_result<size_t> simdjson_result<dom::element>::get_string_length() const noexcept {
if (error()) { return error(); }
return first.get_string_length();
}
really_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept { really_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_string(); return first.get_string();
@ -190,6 +194,15 @@ inline simdjson_result<const char *> element::get_c_str() const noexcept {
return INCORRECT_TYPE; return INCORRECT_TYPE;
} }
} }
inline simdjson_result<size_t> element::get_string_length() const noexcept {
switch (tape.tape_ref_type()) {
case internal::tape_type::STRING: {
return tape.get_string_length();
}
default:
return INCORRECT_TYPE;
}
}
inline simdjson_result<std::string_view> element::get_string() const noexcept { inline simdjson_result<std::string_view> element::get_string() const noexcept {
switch (tape.tape_ref_type()) { switch (tape.tape_ref_type()) {
case internal::tape_type::STRING: case internal::tape_type::STRING: