add multi implementation fuzzer (#1162)

This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).

This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.

Also, it will detect if the implementations behave differently:

    by making sure they all succeed, or all error
    turning the parsed data into text again, should produce equal results

While at it, I corrected some minor things:

    clean up building too many variants, run with forced implementation (closes #815 )
    always store crashes as artefacts, good in case the fuzzer finds something
    return value of the fuzzer function should always be 0
    reduce log spam
    introduce max size for the seed corpus and the CI fuzzer
This commit is contained in:
Paul Dreik 2020-09-11 23:46:22 +02:00 committed by GitHub
parent 8cef02e8e8
commit 6ecbcc7c19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 181 additions and 172 deletions

View File

@ -14,8 +14,10 @@ jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
env: env:
allfuzzers: parser dump dump_raw_tape print_json allfuzzers: parser dump dump_raw_tape print_json implementations
artifactsprefix: -artifact_prefix=fuzzfailure/ implementations: haswell westmere fallback
UBSAN_OPTIONS: halt_on_error=1
MAXLEN: -max_len=4000
steps: steps:
- name: Install packages necessary for building - name: Install packages necessary for building
run: | run: |
@ -30,6 +32,8 @@ jobs:
run: | run: |
fuzz/build_corpus.sh fuzz/build_corpus.sh
mv corpus.zip seed_corpus.zip mv corpus.zip seed_corpus.zip
mkdir seedcorpus
unzip -q -d seedcorpus seed_corpus.zip
- name: Download the corpus from the last run - name: Download the corpus from the last run
run: | run: |
wget --quiet https://dl.bintray.com/pauldreik/simdjson-fuzz-corpus/corpus/corpus.tar wget --quiet https://dl.bintray.com/pauldreik/simdjson-fuzz-corpus/corpus/corpus.tar
@ -42,48 +46,28 @@ jobs:
clang++ --version clang++ --version
- name: Build all the variants - name: Build all the variants
run: fuzz/build_fuzzer_variants.sh run: fuzz/build_fuzzer_variants.sh
- name: Verify that the oss-fuzz seed corpus passes without problems - name: Run the fast fuzzer (release build, default implementation, to explore fast)
run: |
mkdir seedcorpus
unzip -q -d seedcorpus seed_corpus.zip
for buildvariant in noavx withavx; do
for fuzzer in $allfuzzers; do
build-ossfuzz-$buildvariant/fuzz/fuzz_$fuzzer seedcorpus -max_total_time=1
done
done
- name: Run the fastest fuzzer to explore fast
run: | run: |
set -eux
for fuzzer in $allfuzzers; do for fuzzer in $allfuzzers; do
mkdir -p out/$fuzzer # in case this is a new fuzzer, or corpus.tar is broken mkdir -p out/$fuzzer # in case this is a new fuzzer, or corpus.tar is broken
build-ossfuzz-fast9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=30 $artifactsprefix || touch failed build-fast/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=30 $MAXLEN
# make sure the failing output is visible in the log
if [ -e failed ] ; then
ls fuzzfailure/* |xargs -n1 base64
exit 1
fi
done done
- name: Run the other fuzzer variants for $fuzzer, with sanitizers etc - name: Run the slow fuzzer (sanitizer+asserts, good at detecting errors)
run: | run: |
set -x set -eux
for fuzzer in $allfuzzers; do for fuzzer in $allfuzzers; do
build-ossfuzz-withavx/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=20 $artifactsprefix || touch failed for implementation in $implementations; do
build-ossfuzz-noavx/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 $artifactsprefix || touch failed export SIMDJSON_FORCE_IMPLEMENTATION=$implementation
build-ossfuzz-noavx9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 $artifactsprefix || touch failed build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=20 $MAXLEN
if [ -e failed ] ; then done
# make sure the failing output is visible in the log
ls fuzzfailure/* |xargs -n1 base64
exit 1
fi
echo disable msan runs, it fails inside the fuzzing engine and not the fuzzed code!
echo build-ossfuzz-msan-noavx9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 -reload=0 $artifactsprefix
echo build-ossfuzz-msan-withavx9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 -reload=0 $artifactsprefix
echo now have $(ls out/$fuzzer |wc -l) files in corpus echo now have $(ls out/$fuzzer |wc -l) files in corpus
done done
- name: Minimize the corpus with the fast fuzzer - name: Minimize the corpus with the fast fuzzer on the default implementation
run: | run: |
for fuzzer in $allfuzzers; do for fuzzer in $allfuzzers; do
mkdir -p out/cmin/$fuzzer mkdir -p out/cmin/$fuzzer
build-ossfuzz-fast9/fuzz/fuzz_$fuzzer -merge=1 out/cmin/$fuzzer out/$fuzzer build-fast/fuzz/fuzz_$fuzzer -merge=1 $MAXLEN out/cmin/$fuzzer out/$fuzzer seedcorpus
rm -rf out/$fuzzer rm -rf out/$fuzzer
mv out/cmin/$fuzzer out/$fuzzer mv out/cmin/$fuzzer out/$fuzzer
done done
@ -97,15 +81,10 @@ jobs:
with: with:
name: corpus name: corpus
path: corpus.tar path: corpus.tar
- name: Run the corpus through valgrind (normal build) - name: Run the minimized corpus through valgrind (replay build, default implementation)
run: | run: |
for fuzzer in $allfuzzers; do for fuzzer in $allfuzzers; do
find out/$fuzzer -type f |sort|xargs valgrind build-plain-noavx/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer-noavx.txt find out/$fuzzer -type f |sort|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
done
- name: Run the corpus through valgrind (noavx build)
run: |
for fuzzer in $allfuzzers; do
find out/$fuzzer -type f |sort|xargs valgrind build-plain-normal/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer-normal.txt
done done
- name: Compress the valgrind output - name: Compress the valgrind output
run: tar cf valgrind.tar valgrind-*.txt run: tar cf valgrind.tar valgrind-*.txt
@ -125,3 +104,14 @@ jobs:
else else
echo "not on master, won't upload to bintray" echo "not on master, won't upload to bintray"
fi fi
- name: Archive any crashes as an artifact
uses: actions/upload-artifact@v2
if: always()
with:
name: crashes
path: |
crash-*
leak-*
timeout-*
if-no-files-found: ignore

View File

@ -54,10 +54,11 @@ if(ENABLE_FUZZING)
set_property(TEST ${name} APPEND PROPERTY LABELS fuzz) set_property(TEST ${name} APPEND PROPERTY LABELS fuzz)
endfunction() endfunction()
implement_fuzzer(fuzz_parser)
implement_fuzzer(fuzz_minify)
implement_fuzzer(fuzz_dump) implement_fuzzer(fuzz_dump)
implement_fuzzer(fuzz_print_json)
implement_fuzzer(fuzz_dump_raw_tape) implement_fuzzer(fuzz_dump_raw_tape)
implement_fuzzer(fuzz_implementations)
implement_fuzzer(fuzz_minify)
implement_fuzzer(fuzz_parser)
implement_fuzzer(fuzz_print_json)
endif() endif()

View File

@ -1,6 +1,6 @@
#!/bin/sh #!/bin/sh
# #
# Builds a corpus from all json files in the source directory. # Builds a corpus from all small json files in the source directory.
# The files are renamed to the sha1 of their content, and suffixed # The files are renamed to the sha1 of their content, and suffixed
# .json. The files are zipped into a flat file named corpus.zip # .json. The files are zipped into a flat file named corpus.zip
@ -10,9 +10,9 @@ tmp=$(mktemp -d)
root=$(readlink -f "$(dirname "$0")/..") root=$(readlink -f "$(dirname "$0")/..")
find $root -type f -name "*.json" | while read -r json; do find $root -type f -size -4k -name "*.json" | while read -r json; do
cp "$json" "$tmp"/$(sha1sum < "$json" |cut -f1 -d' ').json cp "$json" "$tmp"/$(sha1sum < "$json" |cut -f1 -d' ').json
done done
zip --junk-paths -r corpus.zip "$tmp" zip --quiet --junk-paths -r corpus.zip "$tmp"
rm -rf "$tmp" rm -rf "$tmp"

View File

@ -12,163 +12,62 @@ unset CXX CC CFLAGS CXXFLAGS LDFLAGS
me=$(basename $0) me=$(basename $0)
# A reproduce build, without avx but otherwise as plain # common options
# as it gets. No sanitizers or optimization. COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++-9 -DCMAKE_C_COMPILER=clang-9 -DSIMDJSON_BUILD_STATIC=On -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_GIT=Off"
variant=plain-noavx
# A replay build, as plain as it gets. For use with valgrind/gdb.
variant=replay
if [ ! -d build-$variant ] ; then if [ ! -d build-$variant ] ; then
mkdir build-$variant mkdir build-$variant
cd build-$variant cd build-$variant
cmake .. \ cmake .. \
-GNinja \ $COMMON \
-DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=On \
-DSIMDJSON_IMPLEMENTATION_HASWELL=0
ninja all_fuzzers
cd ..
fi
# A reproduce build as plain as it gets. Everythings tunable is
# using the defaults.
variant=plain-normal
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=On -DSIMDJSON_FUZZ_LINKMAIN=On
ninja all_fuzzers ninja all_fuzzers
cd .. cd ..
fi fi
# a fuzzer with sanitizers, built with avx disabled.
variant=ossfuzz-noavx
if [ ! -d build-$variant ] ; then
export CC=clang # A fuzzer with sanitizers. For improved capability to find bugs.
export CXX="clang++" variant=sanitizers
export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx "
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx" if [ ! -d build-$variant ] ; then
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
mkdir build-$variant mkdir build-$variant
cd build-$variant cd build-$variant
cmake .. \ cmake .. \
-GNinja \ $COMMON \
-DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \
-DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \
-DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=Off \ -DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE \ -DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
-DSIMDJSON_IMPLEMENTATION_HASWELL=0
ninja all_fuzzers ninja all_fuzzers
cd .. cd ..
fi fi
# a fuzzer with sanitizers, built with avx disabled.
variant=ossfuzz-noavx9
if which clang++-9 >/dev/null 2>&1 ; then
if [ ! -d build-$variant ] ; then
export CC=clang-9 # A fast fuzzer, for fast exploration rather than finding bugs.
export CXX="clang++-9" variant=fast
export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx " if [ ! -d build-$variant ] ; then
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
mkdir build-$variant mkdir build-$variant
cd build-$variant cd build-$variant
cmake .. \ cmake .. \
-GNinja \ $COMMON \
-DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link" \
-DSIMDJSON_BUILD_STATIC=On \ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link" \
-DENABLE_FUZZING=On \ -DCMAKE_BUILD_TYPE=Release \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=Off \ -DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE \ -DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
-DSIMDJSON_IMPLEMENTATION_HASWELL=0
ninja all_fuzzers ninja all_fuzzers
cd .. cd ..
fi fi
else
echo "$me: WARNING clang++-9 not found, please install it to build $variant"
fi
# a fuzzer with sanitizers, default built
variant=ossfuzz-withavx
if [ ! -d build-$variant ] ; then
export CC=clang
export CXX="clang++"
export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined"
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE
ninja all_fuzzers
cd ..
fi
# a fast fuzzer, for fast exploration
variant=ossfuzz-fast9
if which clang++-9 >/dev/null 2>&1 ; then
if [ ! -d build-$variant ] ; then
export CC=clang-9
export CXX="clang++-9"
export CFLAGS="-fsanitize=fuzzer-no-link -O3 -g"
export CXXFLAGS="-fsanitize=fuzzer-no-link -O3 -g"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE= \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE
ninja all_fuzzers
cd ..
fi
else
echo "$me: WARNING clang++-9 not found, please install it to build $variant"
fi

View File

@ -14,7 +14,7 @@ export OUT=$(pwd)/ossfuzz-out
export CC=clang export CC=clang
export CXX="clang++" export CXX="clang++"
export CFLAGS="-fsanitize=fuzzer-no-link" export CFLAGS="-fsanitize=fuzzer-no-link"
export CXXFLAGS="-fsanitize=fuzzer-no-link" export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O3"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer" export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
$ossfuzz $ossfuzz

View File

@ -52,7 +52,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
simdjson::dom::element elem; simdjson::dom::element elem;
auto error = parser.parse(Data, Size).get(elem); auto error = parser.parse(Data, Size).get(elem);
if (error) { return 1; } if (error) { return 0; }
NulOStream os; NulOStream os;
//std::ostream& os(std::cout); //std::ostream& os(std::cout);
print_json(os,elem); print_json(os,elem);

View File

@ -10,7 +10,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
simdjson::dom::parser parser; simdjson::dom::parser parser;
simdjson::dom::element elem; simdjson::dom::element elem;
auto error = parser.parse(Data, Size).get(elem); auto error = parser.parse(Data, Size).get(elem);
if (error) { return 1; } if (error) { return 0; }
NulOStream os; NulOStream os;
SIMDJSON_UNUSED auto dumpstatus = elem.dump_raw_tape(os); SIMDJSON_UNUSED auto dumpstatus = elem.dump_raw_tape(os);

View File

@ -0,0 +1,119 @@
/*
* For fuzzing all of the implementations (haswell/fallback/westmere),
* finding any difference between the output of each which would
* indicate inconsistency. Also, it gets the non-default backend
* some fuzzing love.
*
* Copyright Paul Dreik 20200909 for the simdjson project.
*/
#include "simdjson.h"
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <string>
#include <array>
// store each implementation along with it's intermediate results,
// which would make things easier to debug in case this fuzzer ever
// catches anything
struct Impl {
explicit Impl(const simdjson::implementation* im=nullptr) : impl(im),parser(),element(),error(),output(){}
//silence -Weffc++
Impl(const Impl&)=delete;
Impl& operator=(const Impl&)=delete;
const simdjson::implementation* impl;
simdjson::dom::parser parser;
simdjson::dom::element element;
simdjson::error_code error;
std::string output;
};
template<class Iterator>
void showErrorAndAbort(Iterator first, Iterator last) {
auto it=first;
while(it!=last) {
std::cerr<<"Implementation: "<<it->impl->name()<<"\tError:"<<it->error<<'\n';
it++;
}
std::cerr.flush();
std::abort();
}
template<class Iterator>
void showOutputAndAbort(Iterator first, Iterator last) {
for(auto it=first;it!=last;++it) {
std::cerr<<"Implementation: "<<it->impl->name()<<"\tOutput: "<<it->output<<'\n';
}
// show the pairwise results
for(auto it1=first; it1!=last; ++it1) {
for(auto it2=it1; it2!=last; ++it2) {
if(it1!=it2) {
const bool matches=(it1->output==it2->output);
std::cerr<<"Implementation "<<it1->impl->name()<<" and "<<it2->impl->name()<<(matches?" match.":" do NOT match.")<<'\n';
}
}
}
std::cerr.flush();
std::abort();
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// make this dynamic, so it works regardless of how it was compiled
// or what hardware it runs on
constexpr std::size_t Nimplementations_max=3;
const std::size_t Nimplementations=simdjson::available_implementations.size();
if(Nimplementations>Nimplementations_max) {
//there is another backend added, please bump Nimplementations_max!
std::abort();
}
// get pointers to the backend implementation
std::array<Impl,Nimplementations_max> implementations;
{
std::size_t i=0;
for(auto& e: simdjson::available_implementations) {
implementations[i++].impl=e;
}
}
// let each implementation parse and store the result
std::size_t nerrors=0;
for(auto& e: implementations) {
simdjson::active_implementation=e.impl;
e.error=e.parser.parse(Data,Size).get(e.element);
if(e.error) {
++nerrors;
} else {
std::ostringstream oss;
oss<<e.element;
e.output=oss.str();
}
}
//we should either have no errors, or all should error
if(nerrors!=0) {
if(nerrors!=Nimplementations) {
showErrorAndAbort(implementations.begin(),
implementations.begin()+Nimplementations);
}
return 0;
}
//parsing went well for all. compare the output against the first.
const std::string& reference=implementations[0].output;
for(std::size_t i=1; i<Nimplementations; ++i) {
if(implementations[i].output!=reference) {
showOutputAndAbort(implementations.begin(),
implementations.begin()+Nimplementations);
}
}
//all is well
return 0;
}

View File

@ -11,7 +11,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
simdjson::dom::parser parser; simdjson::dom::parser parser;
simdjson::dom::element elem; simdjson::dom::element elem;
auto error = parser.parse(str).get(elem); auto error = parser.parse(str).get(elem);
if (error) { return 1; } if (error) { return 0; }
std::string minified=simdjson::minify(elem); std::string minified=simdjson::minify(elem);
(void)minified; (void)minified;