Merge pull request #1110 from simdjson/jkeiser/number-corruption

Fix potential buffer overrun with heavily customized input and padding
This commit is contained in:
John Keiser 2020-08-18 14:17:25 -07:00 committed by GitHub
commit 9356619380
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 69 additions and 10 deletions

View File

@ -79,6 +79,10 @@ struct structural_parser : structural_iterator {
return SUCCESS;
}
really_inline uint8_t last_structural() {
return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
}
really_inline void log_value(const char *type) {
logger::log_line(*this, "", type, "");
}
@ -114,18 +118,27 @@ WARN_UNUSED really_inline error_code structural_parser::parse(T &builder) noexce
//
{
const uint8_t *value = advance();
switch (*value) {
case '{': if (!empty_object(builder)) { goto object_begin; }; break;
case '[': {
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
if (!STREAMING) {
if (buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') {
// Make sure the outer hash or array is closed before continuing; otherwise, there are ways we
// could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
if (!STREAMING) {
switch (*value) {
case '{':
if (last_structural() != '}') {
return TAPE_ERROR;
}
}
if (!empty_array(builder)) { goto array_begin; }; break;
break;
case '[':
if (last_structural() != ']') {
return TAPE_ERROR;
}
break;
}
}
switch (*value) {
case '{': if (!empty_object(builder)) { goto object_begin; }; break;
case '[': if (!empty_array(builder)) { goto array_begin; }; break;
default: SIMDJSON_TRY( builder.parse_root_primitive(*this, value) );
}
goto document_end;

View File

@ -145,6 +145,49 @@ namespace parser_load {
}
}
namespace adversarial {
#define PADDING_FILLED_WITH_NUMBERS "222222222222222222222222222222222"
bool number_overrun_at_root() {
TEST_START();
constexpr const char *json = "1" PADDING_FILLED_WITH_NUMBERS ",";
constexpr size_t len = 1; // strlen("1");
dom::parser parser;
uint64_t foo;
ASSERT_SUCCESS( parser.parse(json, len).get(foo) ); // Parse just the first digit
ASSERT_EQUAL( foo, 1 );
TEST_SUCCEED();
}
bool number_overrun_in_array() {
TEST_START();
constexpr const char *json = "[1" PADDING_FILLED_WITH_NUMBERS "]";
constexpr size_t len = 2; // strlen("[1");
dom::parser parser;
uint64_t foo;
ASSERT_ERROR( parser.parse(json, len).get(foo), TAPE_ERROR ); // Parse just the first digit
TEST_SUCCEED();
}
bool number_overrun_in_object() {
TEST_START();
constexpr const char *json = "{\"key\":1" PADDING_FILLED_WITH_NUMBERS "}";
constexpr size_t len = 8; // strlen("{\"key\":1");
dom::parser parser;
uint64_t foo;
ASSERT_ERROR( parser.parse(json, len).get(foo), TAPE_ERROR ); // Parse just the first digit
TEST_SUCCEED();
}
bool run() {
static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = strlen(PADDING_FILLED_WITH_NUMBERS)
return true
&& number_overrun_at_root()
&& number_overrun_in_array()
&& number_overrun_in_object()
;
}
}
int main() {
// this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation:
@ -152,7 +195,10 @@ int main() {
printf("unsupported CPU\n");
}
std::cout << "Running error tests." << std::endl;
if (!parser_load::run()) {
if (!(true
&& parser_load::run()
&& adversarial::run()
)) {
return EXIT_FAILURE;
}
std::cout << "Error tests are ok." << std::endl;