From 0b39e3a6cfe99c878d99d0a377812dea68287c2a Mon Sep 17 00:00:00 2001 From: Paul Dreik Date: Thu, 19 Nov 2020 16:51:56 +0100 Subject: [PATCH] add fuzzer for padded_string (#1312) This also fixes an overflow problem. --- .github/workflows/fuzzers.yml | 4 +-- fuzz/CMakeLists.txt | 1 + fuzz/build_fuzzer_variants.sh | 9 ++--- fuzz/build_like_ossfuzz.sh | 2 +- fuzz/fuzz_padded.cpp | 54 ++++++++++++++++++++++++++++ include/simdjson/padded_string-inl.h | 13 ++++++- 6 files changed, 75 insertions(+), 8 deletions(-) create mode 100644 fuzz/fuzz_padded.cpp diff --git a/.github/workflows/fuzzers.yml b/.github/workflows/fuzzers.yml index e4b8c193..1d74c52c 100644 --- a/.github/workflows/fuzzers.yml +++ b/.github/workflows/fuzzers.yml @@ -15,9 +15,9 @@ jobs: runs-on: ubuntu-latest env: # fuzzers that change behaviour with SIMDJSON_FORCE_IMPLEMENTATION - defaultimplfuzzers: atpointer dump dump_raw_tape element minify parser print_json + defaultimplfuzzers: atpointer dump dump_raw_tape element minify parser print_json # fuzzers that loop over the implementations themselves, or don't need to switch. - implfuzzers: implementations minifyimpl ondemand utf8 + implfuzzers: implementations minifyimpl ondemand padded utf8 implementations: haswell westmere fallback UBSAN_OPTIONS: halt_on_error=1 MAXLEN: -max_len=4000 diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt index b421c64d..fc6a390d 100644 --- a/fuzz/CMakeLists.txt +++ b/fuzz/CMakeLists.txt @@ -58,6 +58,7 @@ if(ENABLE_FUZZING) implement_fuzzer(fuzz_minify) # minify *with* parsing implement_fuzzer(fuzz_minifyimpl) # minify *without* parsing, plus compare implementations implement_fuzzer(fuzz_ondemand) + implement_fuzzer(fuzz_padded) implement_fuzzer(fuzz_parser) implement_fuzzer(fuzz_print_json) implement_fuzzer(fuzz_utf8) # utf8 verification, compares across implementations diff --git a/fuzz/build_fuzzer_variants.sh b/fuzz/build_fuzzer_variants.sh index a4289865..d9f5a5a6 100755 --- a/fuzz/build_fuzzer_variants.sh +++ b/fuzz/build_fuzzer_variants.sh @@ -24,7 +24,8 @@ fi set -u # common options -COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++$CLANGSUFFIX -DCMAKE_C_COMPILER=clang$CLANGSUFFIX -DSIMDJSON_BUILD_STATIC=Off -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_GIT=Off -DSIMDJSON_DISABLE_DEPRECATED_API=On" +CXX_CLAGS_COMMON=-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++$CLANGSUFFIX -DCMAKE_C_COMPILER=clang$CLANGSUFFIX -DSIMDJSON_BUILD_STATIC=Off -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_DISABLE_DEPRECATED_API=On" # A replay build, as plain as it gets. For use with valgrind/gdb. variant=replay @@ -63,7 +64,7 @@ variant=sanitizers-O3 cd build-$variant cmake .. \ $COMMON \ - -DCMAKE_CXX_FLAGS="-O3 -fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \ + -DCMAKE_CXX_FLAGS="-O3 -fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined $CXX_CLAGS_COMMON" \ -DCMAKE_C_FLAGS="-O3 -fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \ -DCMAKE_BUILD_TYPE=Debug \ -DSIMDJSON_FUZZ_LINKMAIN=Off \ @@ -81,7 +82,7 @@ variant=sanitizers-O0 cd build-$variant cmake .. \ $COMMON \ - -DCMAKE_CXX_FLAGS="-O0 -fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \ + -DCMAKE_CXX_FLAGS="-O0 -fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined $CXX_CLAGS_COMMON" \ -DCMAKE_C_FLAGS="-O0 -fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \ -DCMAKE_BUILD_TYPE=Debug \ -DSIMDJSON_FUZZ_LINKMAIN=Off \ @@ -101,7 +102,7 @@ variant=fast cmake .. \ $COMMON \ - -DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link" \ + -DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link $CXX_CLAGS_COMMON" \ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link" \ -DCMAKE_BUILD_TYPE=Release \ -DSIMDJSON_FUZZ_LINKMAIN=Off \ diff --git a/fuzz/build_like_ossfuzz.sh b/fuzz/build_like_ossfuzz.sh index 6006ba24..2b727dc8 100755 --- a/fuzz/build_like_ossfuzz.sh +++ b/fuzz/build_like_ossfuzz.sh @@ -14,7 +14,7 @@ export OUT=$(pwd)/ossfuzz-out export CC=clang export CXX="clang++" export CFLAGS="-fsanitize=fuzzer-no-link" -export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O1" +export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O1 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" export LIB_FUZZING_ENGINE="-fsanitize=fuzzer" $ossfuzz diff --git a/fuzz/fuzz_padded.cpp b/fuzz/fuzz_padded.cpp new file mode 100644 index 00000000..642d33f5 --- /dev/null +++ b/fuzz/fuzz_padded.cpp @@ -0,0 +1,54 @@ +#include "FuzzUtils.h" +#include "simdjson.h" +#include +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + FuzzData fd(Data, Size); + + const int action = fd.getInt<0, 10>(); + + const auto s1 = fd.get(); + const auto s2 = fd.get(); + switch (action) { + case 0: { + simdjson_unused simdjson::padded_string p(s1); + } break; + case 1: { + // operator== with temp value + simdjson_unused simdjson::padded_string p1(s1); + simdjson_unused simdjson::padded_string p2(s2); + p1 = std::move(p2); + } break; + case 2: { + // swap + simdjson_unused simdjson::padded_string p1(s1); + simdjson_unused simdjson::padded_string p2(s2); + p1.swap(p2); + } break; + case 3: { + simdjson_unused simdjson::padded_string p(s1); + p.data(); + } break; + case 4: { + simdjson::padded_string p(s1); + simdjson_unused auto sv = static_cast(p); + } break; + case 5: { + // load from file. + const std::string filename = "/dev/shm/fuzz_padded.tmp"; + { + std::ofstream file(filename); + assert(file); + const long ssize = static_cast(fd.Size); + file.write(fd.chardata(), ssize); + assert(file.tellp() == ssize); + } + simdjson_unused auto data = simdjson::padded_string::load(filename); + } break; + default:; + } + return 0; +} diff --git a/include/simdjson/padded_string-inl.h b/include/simdjson/padded_string-inl.h index 7f40995f..215c46e6 100644 --- a/include/simdjson/padded_string-inl.h +++ b/include/simdjson/padded_string-inl.h @@ -19,7 +19,18 @@ namespace internal { // The length parameter is the maximum size in bytes of the string. // The caller is responsible to free the memory (e.g., delete[] (...)). inline char *allocate_padded_buffer(size_t length) noexcept { - size_t totalpaddedlength = length + SIMDJSON_PADDING; + const size_t totalpaddedlength = length + SIMDJSON_PADDING; + if(totalpaddedlength(1UL<<20)) { + return nullptr; + } +#endif + char *padded_buffer = new (std::nothrow) char[totalpaddedlength]; if (padded_buffer == nullptr) { return nullptr;