From f44386008ca1892377949d7ddb2cdfedc2a982f6 Mon Sep 17 00:00:00 2001 From: Paul Dreik Date: Sat, 26 Sep 2020 14:25:00 +0200 Subject: [PATCH] add minifier fuzzers (#1172) This adds a minifier fuzzer. There is also an utf-8 fuzzer, but it is disabled until #1187 is fixed. Run all fuzzers bug the utf-8 one in the github CI fuzz. --- .github/workflows/fuzzers.yml | 6 ++-- fuzz/CMakeLists.txt | 19 ++++++----- fuzz/build_like_ossfuzz.sh | 3 +- fuzz/fuzz_minify.cpp | 4 +++ fuzz/fuzz_minifyimpl.cpp | 63 +++++++++++++++++++++++++++++++++++ fuzz/fuzz_utf8.cpp | 48 ++++++++++++++++++++++++++ fuzz/ossfuzz.sh | 10 +++--- 7 files changed, 137 insertions(+), 16 deletions(-) create mode 100644 fuzz/fuzz_minifyimpl.cpp create mode 100644 fuzz/fuzz_utf8.cpp diff --git a/.github/workflows/fuzzers.yml b/.github/workflows/fuzzers.yml index fbb022d4..fc29badc 100644 --- a/.github/workflows/fuzzers.yml +++ b/.github/workflows/fuzzers.yml @@ -12,7 +12,7 @@ jobs: build: runs-on: ubuntu-latest env: - allfuzzers: atpointer dump dump_raw_tape parser print_json implementations + allfuzzers: atpointer dump dump_raw_tape implementations minify minifyimpl parser print_json implementations: haswell westmere fallback UBSAN_OPTIONS: halt_on_error=1 MAXLEN: -max_len=4000 @@ -24,7 +24,7 @@ jobs: sudo apt-get install --quiet ninja-build valgrind zip unzip wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh - sudo ./llvm.sh 9 + sudo ./llvm.sh 10 - uses: actions/checkout@v1 @@ -92,7 +92,7 @@ jobs: done - name: Save the corpus as a github artifact - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v2 with: name: corpus path: corpus.tar diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt index e7b3a1f0..c2fe00b4 100644 --- a/fuzz/CMakeLists.txt +++ b/fuzz/CMakeLists.txt @@ -28,10 +28,6 @@ if(ENABLE_FUZZING) # the fuzz targets, otherwise the cmake configuration step fails. set(SIMDJSON_FUZZ_LDFLAGS "" CACHE STRING "LDFLAGS for the fuzz targets") - add_custom_target(print_all_fuzz_targets - COMMAND ${CMAKE_COMMAND} -E echo ${SOURCES} - ) - # Fuzzer build flags and libraries add_library(simdjson-fuzzer INTERFACE) if (SIMDJSON_FUZZ_LINKMAIN) @@ -46,20 +42,27 @@ if(ENABLE_FUZZING) # Define the fuzzers add_custom_target(all_fuzzers) + set(fuzzernames) function(implement_fuzzer name) add_executable(${name} ${name}.cpp) target_link_libraries(${name} PRIVATE simdjson-fuzzer) add_dependencies(all_fuzzers ${name}) - add_test(${name} ${name}) - set_property(TEST ${name} APPEND PROPERTY LABELS fuzz) + set(fuzzernames ${fuzzernames} ${name} PARENT_SCOPE) endfunction() implement_fuzzer(fuzz_atpointer) implement_fuzzer(fuzz_dump) implement_fuzzer(fuzz_dump_raw_tape) - implement_fuzzer(fuzz_implementations) - implement_fuzzer(fuzz_minify) + implement_fuzzer(fuzz_implementations) # parses and serializes again, compares across implementations + implement_fuzzer(fuzz_minify) # minify *with* parsing + implement_fuzzer(fuzz_minifyimpl) # minify *without* parsing, plus compare implementations implement_fuzzer(fuzz_parser) implement_fuzzer(fuzz_print_json) + # wait for https://github.com/simdjson/simdjson/issues/1187 to be fixed before adding this back + #implement_fuzzer(fuzz_utf8) # utf8 verification, compares across implementations + + # to be able to get a list of all fuzzers from within a script + add_custom_target(print_all_fuzzernames + COMMAND ${CMAKE_COMMAND} -E echo ${fuzzernames}) endif() diff --git a/fuzz/build_like_ossfuzz.sh b/fuzz/build_like_ossfuzz.sh index 9b69161d..6006ba24 100755 --- a/fuzz/build_like_ossfuzz.sh +++ b/fuzz/build_like_ossfuzz.sh @@ -14,9 +14,10 @@ export OUT=$(pwd)/ossfuzz-out export CC=clang export CXX="clang++" export CFLAGS="-fsanitize=fuzzer-no-link" -export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O3" +export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O1" export LIB_FUZZING_ENGINE="-fsanitize=fuzzer" $ossfuzz echo "look at the results in $OUT" + diff --git a/fuzz/fuzz_minify.cpp b/fuzz/fuzz_minify.cpp index 80ff2584..872991a0 100644 --- a/fuzz/fuzz_minify.cpp +++ b/fuzz/fuzz_minify.cpp @@ -3,6 +3,10 @@ #include #include #include + +/* + * Minifies by first parsing, then minifying. + */ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { auto begin = as_chars(Data); diff --git a/fuzz/fuzz_minifyimpl.cpp b/fuzz/fuzz_minifyimpl.cpp new file mode 100644 index 00000000..827e960c --- /dev/null +++ b/fuzz/fuzz_minifyimpl.cpp @@ -0,0 +1,63 @@ +/* + * Minifies using the minify() function directly, without parsing. + * + * For fuzzing all of the implementations (haswell/fallback/westmere), + * finding any difference between the output of each which would + * indicate inconsistency. Also, it gets the non-default backend + * some fuzzing love. + * + * Copyright Paul Dreik 20200912 for the simdjson project. + */ + +#include "simdjson.h" +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + + using Buffer=std::vector; + auto minify=[Data,Size](const simdjson::implementation* impl) -> Buffer { + Buffer ret(Size); + std::size_t retsize=0; + auto err=impl->minify(Data,Size,ret.data(),retsize); + if(err) { + std::string tmp = error_message(err); + ret.assign(tmp.begin(),tmp.end()); + } else { + assert(retsize<=Size && "size should not grow by minimize()!"); + ret.resize(retsize); + } + return ret; + }; + + + auto first=simdjson::available_implementations.begin(); + auto last=simdjson::available_implementations.end(); + + //make sure there is an implementation + assert(first!=last); + + const auto reference=minify(*first); + + bool failed=false; + for(auto it=first+1;it!=last; ++it) { + const auto current=minify(*it); + if(current!=reference) { + failed=true; + } + } + + if(failed) { + std::cerr<name()<<" returns "< +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + + auto utf8verify=[Data,Size](const simdjson::implementation* impl) -> bool { + return impl->validate_utf8((const char*)Data,Size); + }; + + + auto first=simdjson::available_implementations.begin(); + auto last=simdjson::available_implementations.end(); + + //make sure there is an implementation + assert(first!=last); + + const bool reference=utf8verify(*first); + + bool failed=false; + for(auto it=first+1;it!=last; ++it) { + const bool current=utf8verify(*it); + if(current!=reference) { + failed=true; + } + } + + if(failed) { + std::cerr<name()<<" returns "</dev/null; then @@ -32,13 +30,17 @@ cmake .. \ -DENABLE_FUZZING=On \ -DSIMDJSON_COMPETITION=Off \ -DSIMDJSON_FUZZ_LINKMAIN=Off \ +-DSIMDJSON_GIT=Off \ +-DSIMDJSON_GOOGLE_BENCHMARKS=Off \ -DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE cmake --build . --target all_fuzzers cp fuzz/fuzz_* $OUT -# all corpora are equal, they all take json as input +# all fuzzers but one (the tiny target for utf8 validation) takes json +# as input, therefore use the same corpus of json files for all. for f in $(ls $OUT/fuzz* |grep -v '.zip$') ; do cp ../corpus.zip $OUT/$(basename $f).zip done +