Making input capacity more robust. (#1488)

This commit is contained in:
Daniel Lemire 2021-03-09 09:58:38 -05:00 committed by GitHub
parent 8e8fbc4cff
commit 8b8af6aee5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 25 additions and 3 deletions

View File

@ -46,11 +46,19 @@ inline simdjson_result<size_t> parser::read_file(const std::string &path) noexce
std::fclose(fp); std::fclose(fp);
return IO_ERROR; return IO_ERROR;
} }
#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
__int64 len = _ftelli64(fp);
if(len == -1L) {
std::fclose(fp);
return IO_ERROR;
}
#else
long len = std::ftell(fp); long len = std::ftell(fp);
if((len < 0) || (len == LONG_MAX)) { if((len < 0) || (len == LONG_MAX)) {
std::fclose(fp); std::fclose(fp);
return IO_ERROR; return IO_ERROR;
} }
#endif
// Make sure we have enough capacity to load the file // Make sure we have enough capacity to load the file
if (_loaded_bytes_capacity < size_t(len)) { if (_loaded_bytes_capacity < size_t(len)) {

View File

@ -114,6 +114,8 @@ public:
* @param path The path to load. * @param path The path to load.
* @return The document, or an error: * @return The document, or an error:
* - IO_ERROR if there was an error opening or reading the file. * - IO_ERROR if there was an error opening or reading the file.
* Be mindful that on some 32-bit systems,
* the file size might be limited to 2 GB.
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - CAPACITY if the parser does not have enough capacity and len > max_capacity.
* - other json errors if parsing fails. You should not rely on these errors to always the same for the * - other json errors if parsing fails. You should not rely on these errors to always the same for the

View File

@ -37,7 +37,6 @@ enum error_code {
PARSER_IN_USE, ///< parser is already in use. PARSER_IN_USE, ///< parser is already in use.
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
/** @private Number of error codes */
NUM_ERROR_CODES NUM_ERROR_CODES
}; };

View File

@ -54,6 +54,7 @@ inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parse
// Leaving these here so they can be inlined if so desired // Leaving these here so they can be inlined if so desired
inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
// Stage 1 index output // Stage 1 index output
size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );

View File

@ -83,6 +83,9 @@ public:
/** /**
* Change the capacity of this parser. * Change the capacity of this parser.
* *
* The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB)
* and an CAPACITY error is returned if it is attempted.
*
* Generally used for reallocation. * Generally used for reallocation.
* *
* @param capacity The new capacity. * @param capacity The new capacity.

View File

@ -131,11 +131,19 @@ inline simdjson_result<padded_string> padded_string::load(const std::string &fil
std::fclose(fp); std::fclose(fp);
return IO_ERROR; return IO_ERROR;
} }
#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
__int64 llen = _ftelli64(fp);
if(llen == -1L) {
std::fclose(fp);
return IO_ERROR;
}
#else
long llen = std::ftell(fp); long llen = std::ftell(fp);
if((llen < 0) || (llen == LONG_MAX)) { if((llen < 0) || (llen == LONG_MAX)) {
std::fclose(fp); std::fclose(fp);
return IO_ERROR; return IO_ERROR;
} }
#endif
// Allocate the padded_string // Allocate the padded_string
size_t len = static_cast<size_t>(llen); size_t len = static_cast<size_t>(llen);

View File

@ -106,6 +106,9 @@ struct padded_string final {
/** /**
* Load this padded string from a file. * Load this padded string from a file.
* *
* @return IO_ERROR on error. Be mindful that on some 32-bit systems,
* the file size might be limited to 2 GB.
*
* @param path the path to the file. * @param path the path to the file.
**/ **/
inline static simdjson_result<padded_string> load(const std::string &path) noexcept; inline static simdjson_result<padded_string> load(const std::string &path) noexcept;

View File

@ -22088,8 +22088,6 @@ public:
simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
private:
simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
}; };