add ndjson fuzzer (#1304)

* add ndjson fuzzer

* reproduce #1310 in the newly added unit test

Had to replace the input, because:
1)
the fuzzer uses the first part of the input to determine
the batch_size to use, so that has to be cut off

2)
the master now protects against low values of batch_size

I also made the test not return early, so the error is triggered.
This commit is contained in:
Paul Dreik 2020-12-01 21:58:41 +01:00 committed by GitHub
parent 59c857e969
commit 725ca010e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 41 additions and 2 deletions

View File

@ -17,7 +17,7 @@ jobs:
# fuzzers that change behaviour with SIMDJSON_FORCE_IMPLEMENTATION
defaultimplfuzzers: atpointer dump dump_raw_tape element minify parser print_json
# fuzzers that loop over the implementations themselves, or don't need to switch.
implfuzzers: implementations minifyimpl ondemand padded utf8
implfuzzers: implementations minifyimpl ndjson ondemand padded utf8
implementations: haswell westmere fallback
UBSAN_OPTIONS: halt_on_error=1
MAXLEN: -max_len=4000

View File

@ -57,6 +57,7 @@ if(ENABLE_FUZZING)
implement_fuzzer(fuzz_implementations) # parses and serializes again, compares across implementations
implement_fuzzer(fuzz_minify) # minify *with* parsing
implement_fuzzer(fuzz_minifyimpl) # minify *without* parsing, plus compare implementations
implement_fuzzer(fuzz_ndjson) # the ndjson api
implement_fuzzer(fuzz_ondemand)
implement_fuzzer(fuzz_padded)
implement_fuzzer(fuzz_parser)

View File

@ -105,6 +105,14 @@ struct FuzzData {
return {};
}
// consumes the rest of the data as a string view
std::string_view remainder_as_stringview() {
std::string_view ret{chardata(),Size};
Data+=Size;
Size=0;
return ret;
}
// split the remainder of the data into string views,
std::vector<std::string_view> splitIntoStrings() {
std::vector<std::string_view> ret;

31
fuzz/fuzz_ndjson.cpp Normal file
View File

@ -0,0 +1,31 @@
#include "simdjson.h"
#include <cstddef>
#include <cstdint>
#include <string>
#include "FuzzUtils.h"
#include "NullBuffer.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
FuzzData fd(Data, Size);
const auto batch_size=static_cast<size_t>(fd.getInt<0,1000>());
const auto json=simdjson::padded_string{fd.remainder_as_stringview()};
simdjson::dom::parser parser;
#if SIMDJSON_EXCEPTIONS
try {
#endif
simdjson::dom::document_stream docs;
if(parser.parse_many(json,batch_size).get(docs)) {
return 0;
}
size_t bool_count=0;
for (auto doc : docs) {
bool_count+=doc.is_bool();
}
#if SIMDJSON_EXCEPTIONS
} catch(...) {
}
#endif
return 0;
}

View File

@ -30,7 +30,6 @@ cmake .. \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=Off \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_GIT=Off \
-DSIMDJSON_GOOGLE_BENCHMARKS=Off \
-DSIMDJSON_DISABLE_DEPRECATED_API=On \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE