From e5c9a310cf14ac8ca0533e4131c1dd091941dbdf Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 20 Oct 2021 12:18:04 -0400 Subject: [PATCH] Preparing release 1.0.1. --- CMakeLists.txt | 2 +- Doxyfile | 2 +- include/simdjson/simdjson_version.h | 4 +- singleheader/simdjson.cpp | 2 +- singleheader/simdjson.h | 122 ++++++++++++++++++++-------- 5 files changed, 92 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 235222a8..bdc96698 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.14) project( simdjson # The version number is modified by tools/release.py - VERSION 1.0.0 + VERSION 1.0.1 DESCRIPTION "Parsing gigabytes of JSON per second" HOMEPAGE_URL "https://simdjson.org/" LANGUAGES CXX C diff --git a/Doxyfile b/Doxyfile index e4c0bdff..030143e9 100644 --- a/Doxyfile +++ b/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = simdjson # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "1.0.0" +PROJECT_NUMBER = "1.0.1" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/include/simdjson/simdjson_version.h b/include/simdjson/simdjson_version.h index bd1d6c55..afa39731 100644 --- a/include/simdjson/simdjson_version.h +++ b/include/simdjson/simdjson_version.h @@ -4,7 +4,7 @@ #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION 1.0.0 +#define SIMDJSON_VERSION 1.0.1 namespace simdjson { enum { @@ -19,7 +19,7 @@ enum { /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 0 + SIMDJSON_VERSION_REVISION = 1 }; } // namespace simdjson diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index 85cfb3f3..db1ccfcc 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2021-09-07 14:34:40 -0400. Do not edit! */ +/* auto-generated on 2021-10-20 12:15:35 -0400. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index df1f97b9..49363973 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2021-09-07 14:34:40 -0400. Do not edit! */ +/* auto-generated on 2021-10-20 12:15:35 -0400. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -2189,7 +2189,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION 1.0.0 +#define SIMDJSON_VERSION 1.0.1 namespace simdjson { enum { @@ -2204,7 +2204,7 @@ enum { /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 0 + SIMDJSON_VERSION_REVISION = 1 }; } // namespace simdjson @@ -8995,7 +8995,10 @@ simdjson_really_inline void mini_formatter::string(std::string_view unescaped) { size_t i = 0; // Fast path for the case where we have no control character, no ", and no backslash. // This should include most keys. - constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // + // We would like to use 'bool' but some compilers take offense to bitwise operation + // with bool types. + constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, @@ -9007,6 +9010,8 @@ simdjson_really_inline void mini_formatter::string(std::string_view unescaped) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; for(;i + 8 <= unescaped.length(); i += 8) { // Poor's man vectorization. This could get much faster if we used SIMD. + // + // It is not the case that replacing '|' with '||' would be neutral performance-wise. if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] @@ -11264,7 +11269,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } @@ -11387,7 +11392,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -11437,7 +11442,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -11485,9 +11490,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; @@ -13095,7 +13102,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } @@ -13218,7 +13225,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -13268,7 +13275,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -13316,9 +13323,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; @@ -15411,7 +15420,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } @@ -15534,7 +15543,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -15584,7 +15593,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -15632,9 +15641,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; @@ -17826,7 +17837,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } @@ -17949,7 +17960,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -17999,7 +18010,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -18047,9 +18058,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; @@ -20099,7 +20112,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } @@ -20222,7 +20235,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -20272,7 +20285,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } @@ -20320,9 +20333,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // - The value we are looking at is less than or equal to INT64_MAX. // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; @@ -22888,6 +22903,14 @@ public: * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -22911,6 +22934,13 @@ public: * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -22984,7 +23014,7 @@ public: * type. * * number.get_number_type() is number_type::signed_integer if we have - * a integer in [-9223372036854775808,9223372036854775808) + * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * @@ -23126,6 +23156,7 @@ public: simdjson_really_inline document_reference() noexcept; simdjson_really_inline document_reference(document &d) noexcept; simdjson_really_inline document_reference(const document_reference &other) noexcept = default; + simdjson_really_inline document_reference& operator=(const document_reference &other) noexcept = default; simdjson_really_inline void rewind() noexcept; simdjson_really_inline simdjson_result get_array() & noexcept; simdjson_really_inline simdjson_result get_object() & noexcept; @@ -23684,7 +23715,7 @@ public: * type. * * number.get_number_type() is number_type::signed_integer if we have - * a integer in [-9223372036854775808,9223372036854775808) + * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * @@ -24050,6 +24081,13 @@ public: * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -24076,6 +24114,13 @@ public: * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -24271,6 +24316,11 @@ public: * ondemand::parser parser; * document doc = parser.iterate(json); * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * * ### IMPORTANT: Validate what you use * * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to @@ -24383,13 +24433,15 @@ public: * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single - * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating - * with no separator. documents that consist of a single primitive (i.e. documents that are not - * arrays or objects) MUST be separated with whitespace. + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excesively small values may impact negatively the @@ -27789,7 +27841,7 @@ simdjson_really_inline simdjson_result document_reference::find_field_uno simdjson_really_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_really_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_really_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_really_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); }; +simdjson_really_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_really_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } simdjson_really_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } simdjson_really_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); }