Pass "capacity" straight to iterate, support std::string

This commit is contained in:
John Keiser 2021-03-02 22:15:54 -08:00
parent 3db1a214ce
commit f0e92e3bdd
13 changed files with 198 additions and 62 deletions

View File

@ -75,8 +75,9 @@ If you have a buffer of your own with enough padding already (SIMDJSON_PADDING e
```c++
ondemand::parser parser;
auto json = "[1] "; // An extra 32 bytes
ondemand::document doc = parser.iterate(promise_padded(json, 3)); // 3 JSON bytes
char json[3+SIMDJSON_PADDING];
strcpy(json, "[1]");
ondemand::document doc = parser.iterate(json, strlen(json), sizeof(json));
```
Documents Are Iterators

View File

@ -36,6 +36,7 @@ enum error_code {
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
PARSER_IN_USE, ///< parser is already in use.
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
/** @private Number of error codes */
NUM_ERROR_CODES
};

View File

@ -24,6 +24,8 @@ simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t n
}
simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::iterate(padded_string_view json) & noexcept {
if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
// Allocate if needed
if (capacity() < json.length() || !string_buf) {
SIMDJSON_TRY( allocate(json.length(), max_depth()) );
@ -34,6 +36,22 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::it
return document::start({ reinterpret_cast<const uint8_t *>(json.data()), this });
}
simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::iterate(const char *json, size_t len, size_t allocated) & noexcept {
return iterate(padded_string_view(json, len, allocated));
}
simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept {
return iterate(padded_string_view(json, len, allocated));
}
simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::iterate(std::string_view json, size_t allocated) & noexcept {
return iterate(padded_string_view(json, allocated));
}
simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::iterate(const std::string &json) & noexcept {
return iterate(padded_string_view(json));
}
simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string_view> &result) & noexcept {
// We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
SIMDJSON_TRY( result.error() );
@ -49,6 +67,8 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::it
}
simdjson_warn_unused simdjson_really_inline simdjson_result<json_iterator> parser::iterate_raw(padded_string_view json) & noexcept {
if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
// Allocate if needed
if (capacity() < json.length()) {
SIMDJSON_TRY( allocate(json.length(), max_depth()) );

View File

@ -53,8 +53,11 @@ public:
* those bytes are initialized to, as long as they are allocated.
*
* @param json The JSON to parse.
* @param len The length of the JSON.
* @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING).
*
* @return The document, or an error:
* - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes.
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
* allocation fails.
* - EMPTY if the document is all whitespace.
@ -63,9 +66,21 @@ public:
* - UNCLOSED_STRING if there is an unclosed string in the document.
*/
simdjson_warn_unused simdjson_result<document> iterate(padded_string_view json) & noexcept;
/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
simdjson_warn_unused simdjson_result<document> iterate(const char *json, size_t len, size_t capacity) & noexcept;
/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
simdjson_warn_unused simdjson_result<document> iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept;
/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
simdjson_warn_unused simdjson_result<document> iterate(std::string_view json, size_t capacity) & noexcept;
/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
simdjson_warn_unused simdjson_result<document> iterate(const std::string &json) & noexcept;
/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
simdjson_warn_unused simdjson_result<document> iterate(const simdjson_result<padded_string> &json) & noexcept;
/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
simdjson_warn_unused simdjson_result<document> iterate(const simdjson_result<padded_string_view> &json) & noexcept;
/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
simdjson_warn_unused simdjson_result<document> iterate(padded_string &&json) & noexcept = delete;
/**
* @private
*
@ -91,8 +106,11 @@ public:
* those bytes are initialized to, as long as they are allocated.
*
* @param json The JSON to parse.
* @param len The length of the JSON.
* @param allocated The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING).
*
* @return The iterator, or an error:
* - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes.
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
* allocation fails.
* - EMPTY if the document is all whitespace.
@ -102,7 +120,9 @@ public:
*/
simdjson_warn_unused simdjson_result<json_iterator> iterate_raw(padded_string_view json) & noexcept;
/** The capacity of this parser (the largest document it can process). */
simdjson_really_inline size_t capacity() const noexcept;
/** The maximum depth of this parser (the most deeply nested objects and arrays it can process). */
simdjson_really_inline size_t max_depth() const noexcept;
private:

View File

@ -111,7 +111,9 @@ inline char *padded_string::data() noexcept { return data_ptr; }
inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); }
inline padded_string::operator padded_string_view() const noexcept { return padded_string_view(data(), length()); }
inline padded_string::operator padded_string_view() const noexcept {
return padded_string_view(data(), length(), length() + SIMDJSON_PADDING);
}
inline simdjson_result<padded_string> padded_string::load(const std::string &filename) noexcept {
// Open the file

View File

@ -11,7 +11,7 @@
namespace simdjson {
struct padded_string_view;
class padded_string_view;
/**
* String with extra allocation for ease of use with parser::parse()

View File

@ -11,28 +11,29 @@
namespace simdjson {
inline padded_string_view::padded_string_view(const char* s, size_t s_len) noexcept
: std::string_view(s, s_len)
inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept
: std::string_view(s, len), _capacity(capacity)
{
}
inline padded_string_view::padded_string_view(const uint8_t* s, size_t s_len) noexcept
: padded_string_view(reinterpret_cast<const char*>(s), s_len)
inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept
: padded_string_view(reinterpret_cast<const char*>(s), len, capacity)
{
}
inline padded_string_view::padded_string_view(std::string_view s) noexcept
: std::string_view(s)
inline padded_string_view::padded_string_view(const std::string &s) noexcept
: std::string_view(s), _capacity(s.capacity())
{
}
inline padded_string_view promise_padded(const char* s, uint8_t s_len) noexcept {
return padded_string_view(s, s_len);
inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept
: std::string_view(s), _capacity(capacity)
{
}
inline padded_string_view promise_padded(std::string_view s) noexcept {
return padded_string_view(s);
}
inline size_t padded_string_view::capacity() const noexcept { return _capacity; }
inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); }
} // namespace simdjson

View File

@ -15,7 +15,11 @@ namespace simdjson {
/**
* User-provided string that promises it has extra padded bytes at the end for use with parser::parse().
*/
struct padded_string_view : public std::string_view {
class padded_string_view : public std::string_view {
private:
size_t _capacity;
public:
/** Create an empty padded_string_view. */
inline padded_string_view() noexcept = default;
@ -23,35 +27,38 @@ struct padded_string_view : public std::string_view {
* Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it.
*
* @param s The string.
* @param s_len The length of the string (not including padding).
* @param len The length of the string (not including padding).
* @param capacity The allocated length of the string, including padding.
*/
explicit inline padded_string_view(const char* s, size_t s_len) noexcept;
/** overload explicit inline padded_string_view(const char* s, size_t s_len) noexcept */
explicit inline padded_string_view(const uint8_t* s, size_t s_len) noexcept;
explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept;
/** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */
explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept;
/**
* Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it.
*
* The capacity of the string will be used to determine its padding.
*
* @param s The string.
*/
explicit inline padded_string_view(const std::string &s) noexcept;
/**
* Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it.
*
* @param s The string.
* @param capacity The allocated length of the string, including padding.
*/
explicit inline padded_string_view(std::string_view s) noexcept;
explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept;
/** The number of allocated bytes. */
inline size_t capacity() const noexcept;
/** The amount of padding on the string (capacity() - length()) */
inline size_t padding() const noexcept;
}; // padded_string_view
/**
* Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it.
*
* @param s The string.
* @param s_len The length of the string (not including padding).
*/
inline padded_string_view promise_padded(const char* s, uint8_t s_len) noexcept;
/**
* Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it.
*
* @param s The string.
*/
inline padded_string_view promise_padded(std::string_view s) noexcept;
#if SIMDJSON_EXCEPTIONS
/**
* Send padded_string instance to an output stream.

View File

@ -29,7 +29,8 @@ namespace internal {
{ INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
{ UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
{ PARSER_IN_USE, "Cannot parse a new document while a document is still in use." },
{ OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }
{ OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." },
{ INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length." }
}; // error_messages[]
} // namespace internal

View File

@ -8,12 +8,12 @@
# adds a compilation test. Two targets are created, one expected to
# succeed compilation and one that is expected to fail.
function(add_dual_compile_test TEST_NAME)
add_cpp_test(${TEST_NAME}_should_compile SOURCES ${TEST_NAME}.cpp COMPILE_ONLY LABELS no_mingw)
add_cpp_test(${TEST_NAME}_should_not_compile SOURCES ${TEST_NAME}.cpp COMPILE_ONLY WILL_FAIL LABELS acceptance no_mingw)
add_cpp_test(${TEST_NAME}_should_compile SOURCES ${TEST_NAME}.cpp COMPILE_ONLY LABELS onedemand no_mingw)
add_cpp_test(${TEST_NAME}_should_not_compile SOURCES ${TEST_NAME}.cpp COMPILE_ONLY WILL_FAIL LABELS ondemand acceptance no_mingw)
target_compile_definitions(${TEST_NAME}_should_not_compile PRIVATE COMPILATION_TEST_USE_FAILING_CODE=1)
endfunction(add_dual_compile_test)
add_dual_compile_test(iterate_string)
add_dual_compile_test(iterate_char_star)
add_dual_compile_test(iterate_string_view)
add_dual_compile_test(iterate_temporary_buffer)

View File

@ -6,8 +6,8 @@ using namespace simdjson;
int main() {
ondemand::parser parser;
#if COMPILATION_TEST_USE_FAILING_CODE
auto json = std::string("1");
auto doc = parser.iterate(json);
const char* json;
auto doc = parser.iterate(json, strlen(json));
#else
auto json = "1"_padded;
auto doc = parser.iterate(json);

View File

@ -18,35 +18,116 @@ namespace parse_api_tests {
return true;
}
bool parser_iterate_padded() {
TEST_START();
ondemand::parser parser;
const char json_str[] = "12\0 "; // 32 padding
ASSERT_EQUAL(sizeof(json_str), 34);
ASSERT_EQUAL(strlen(json_str), 2);
{
cout << "- char*" << endl;
auto doc = parser.iterate(json_str, strlen(json_str), sizeof(json_str));
ASSERT_SUCCESS( doc.get_double() );
}
{
cout << "- uint8_t*" << endl;
const uint8_t* json = reinterpret_cast<const uint8_t*>(json_str);
auto doc = parser.iterate(json, strlen(json_str), sizeof(json_str));
ASSERT_SUCCESS( doc.get_double() );
}
{
cout << "- string_view" << endl;
std::string_view json(json_str);
auto doc = parser.iterate(json, sizeof(json_str));
ASSERT_SUCCESS( doc.get_double() );
}
{
cout << "- string" << endl;
std::string json = "12";
json.reserve(json.length() + SIMDJSON_PADDING);
auto doc = parser.iterate(json);
ASSERT_SUCCESS( doc.get_double() );
}
TEST_SUCCEED();
}
bool parser_iterate_padded_string_view() {
TEST_START();
ondemand::parser parser;
const char json_str[] = "12\0 "; // 32 padding
ASSERT_EQUAL(sizeof(json_str), 34);
ASSERT_EQUAL(strlen(json_str), 2);
padded_string_view json("12 ", 2);
auto doc = parser.iterate(json);
ASSERT_SUCCESS( doc.get_double() );
{
cout << "- padded_string_view(string_view)" << endl;
padded_string_view json(std::string_view(json_str), sizeof(json_str));
auto doc = parser.iterate(json);
ASSERT_SUCCESS( doc.get_double() );
}
json = padded_string_view(std::string_view("12 ", 2));
doc = parser.iterate(json);
ASSERT_SUCCESS( doc.get_double() );
{
cout << "- padded_string_view(char*)" << endl;
auto doc = parser.iterate(padded_string_view(json_str, strlen(json_str), sizeof(json_str)));
ASSERT_SUCCESS( doc.get_double() );
}
doc = parser.iterate(padded_string_view("12 ", 2));
ASSERT_SUCCESS( doc.get_double() );
return true;
{
cout << "- padded_string_view(string)" << endl;
std::string json = "12";
json.reserve(json.length() + SIMDJSON_PADDING);
auto doc = parser.iterate(padded_string_view(json));
ASSERT_SUCCESS( doc.get_double() );
}
{
cout << "- padded_string_view(string_view(char*))" << endl;
padded_string_view json(json_str, sizeof(json_str));
auto doc = parser.iterate(json);
ASSERT_SUCCESS( doc.get_double() );
}
TEST_SUCCEED();
}
bool parser_iterate_promise_padded() {
bool parser_iterate_insufficient_padding() {
TEST_START();
ondemand::parser parser;
constexpr char json_str[] = "12\0 "; // 31 padding
ASSERT_EQUAL(sizeof(json_str), 33);
ASSERT_EQUAL(strlen(json_str), 2);
ASSERT_EQUAL(padded_string_view(json_str, strlen(json_str), sizeof(json_str)).padding(), 31);
ASSERT_EQUAL(SIMDJSON_PADDING, 32);
auto doc = parser.iterate(promise_padded("12 ", 2));
ASSERT_SUCCESS( doc.get_double() );
{
cout << "- char*, 31 padding" << endl;
ASSERT_ERROR( parser.iterate(json_str, strlen(json_str), sizeof(json_str)), INSUFFICIENT_PADDING );
cout << "- char*, 0 padding" << endl;
ASSERT_ERROR( parser.iterate(json_str, strlen(json_str), strlen(json_str)), INSUFFICIENT_PADDING );
}
std::string_view json("12 ", 2);
doc = parser.iterate(promise_padded(json));
ASSERT_SUCCESS( doc.get_double() );
{
std::string_view json(json_str);
cout << "- string_view, 31 padding" << endl;
ASSERT_ERROR( parser.iterate(json, sizeof(json_str)), INSUFFICIENT_PADDING );
cout << "- string_view, 0 padding" << endl;
ASSERT_ERROR( parser.iterate(json, strlen(json_str)), INSUFFICIENT_PADDING );
}
return true;
{
std::string json = "12";
json.shrink_to_fit();
cout << "- string, 0 padding" << endl;
ASSERT_ERROR( parser.iterate(json), INSUFFICIENT_PADDING );
// It's actually kind of hard to allocate "just enough" capacity, since the string tends
// to grow more than you tell it to.
}
TEST_SUCCEED();
}
#if SIMDJSON_EXCEPTIONS
@ -55,14 +136,15 @@ namespace parse_api_tests {
ondemand::parser parser;
auto doc = parser.iterate(BASIC_JSON);
simdjson_unused ondemand::array array = doc;
return true;
TEST_SUCCEED();
}
#endif // SIMDJSON_EXCEPTIONS
bool run() {
return parser_iterate() &&
parser_iterate_padded() &&
parser_iterate_padded_string_view() &&
parser_iterate_promise_padded() &&
parser_iterate_insufficient_padding() &&
#if SIMDJSON_EXCEPTIONS
parser_iterate_exception() &&
#endif // SIMDJSON_EXCEPTIONS

View File

@ -35,8 +35,9 @@ bool basics_3() {
TEST_START();
ondemand::parser parser;
auto json = "[1] "; // An extra 32 bytes
ondemand::document doc = parser.iterate(promise_padded(json, 3)); // 3 JSON bytes
char json[3+SIMDJSON_PADDING];
strcpy(json, "[1]");
ondemand::document doc = parser.iterate(json, strlen(json), sizeof(json));
simdjson_unused auto unused_doc = doc.get_array();