diff --git a/src/parsedjson.cpp b/src/parsedjson.cpp index a9d6bea0..0c6a1bdb 100644 --- a/src/parsedjson.cpp +++ b/src/parsedjson.cpp @@ -46,8 +46,11 @@ bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) { uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7; structural_indexes = new (std::nothrow) uint32_t[max_structures]; // a pathological input like "[[[[..." would generate len tape elements, so - // need a capacity of len + 1 - size_t local_tape_capacity = ROUNDUP_N(len + 1, 64); + // need a capacity of at least len + 1, but it is also possible to do + // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" + //where len + 1 tape elements are + // generated, see issue https://github.com/lemire/simdjson/issues/345 + size_t local_tape_capacity = ROUNDUP_N(len + 2, 64); // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64); diff --git a/tests/basictests.cpp b/tests/basictests.cpp index cc842023..a22b256b 100644 --- a/tests/basictests.cpp +++ b/tests/basictests.cpp @@ -153,6 +153,18 @@ bool number_test_powers_of_ten() { return true; } + +// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345 +bool bad_example() { + std::string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"; + simdjson::ParsedJson pj = simdjson::build_parsed_json(badjson); + if(pj.is_valid()) { + printf("This json should not be valid %s.\n", badjson.c_str()); + return false; + } + return true; +} + // returns true if successful bool navigate_test() { std::string json = "{" @@ -279,6 +291,8 @@ bool skyprophet_test() { int main() { std::cout << "Running basic tests." << std::endl; + if(!bad_example()) + return EXIT_FAILURE; if(!number_test_powers_of_two()) return EXIT_FAILURE; if(!number_test_powers_of_ten())