add multi implementation fuzzer (#1162)

This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).

This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.

Also, it will detect if the implementations behave differently:

    by making sure they all succeed, or all error
    turning the parsed data into text again, should produce equal results

While at it, I corrected some minor things:

    clean up building too many variants, run with forced implementation (closes #815 )
    always store crashes as artefacts, good in case the fuzzer finds something
    return value of the fuzzer function should always be 0
    reduce log spam
    introduce max size for the seed corpus and the CI fuzzer
This commit is contained in:
Paul Dreik 2020-09-11 23:46:22 +02:00 committed by GitHub
parent 8cef02e8e8
commit 6ecbcc7c19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 181 additions and 172 deletions

View File

@ -14,8 +14,10 @@ jobs:
build:
runs-on: ubuntu-latest
env:
allfuzzers: parser dump dump_raw_tape print_json
artifactsprefix: -artifact_prefix=fuzzfailure/
allfuzzers: parser dump dump_raw_tape print_json implementations
implementations: haswell westmere fallback
UBSAN_OPTIONS: halt_on_error=1
MAXLEN: -max_len=4000
steps:
- name: Install packages necessary for building
run: |
@ -30,6 +32,8 @@ jobs:
run: |
fuzz/build_corpus.sh
mv corpus.zip seed_corpus.zip
mkdir seedcorpus
unzip -q -d seedcorpus seed_corpus.zip
- name: Download the corpus from the last run
run: |
wget --quiet https://dl.bintray.com/pauldreik/simdjson-fuzz-corpus/corpus/corpus.tar
@ -42,48 +46,28 @@ jobs:
clang++ --version
- name: Build all the variants
run: fuzz/build_fuzzer_variants.sh
- name: Verify that the oss-fuzz seed corpus passes without problems
run: |
mkdir seedcorpus
unzip -q -d seedcorpus seed_corpus.zip
for buildvariant in noavx withavx; do
for fuzzer in $allfuzzers; do
build-ossfuzz-$buildvariant/fuzz/fuzz_$fuzzer seedcorpus -max_total_time=1
done
done
- name: Run the fastest fuzzer to explore fast
- name: Run the fast fuzzer (release build, default implementation, to explore fast)
run: |
set -eux
for fuzzer in $allfuzzers; do
mkdir -p out/$fuzzer # in case this is a new fuzzer, or corpus.tar is broken
build-ossfuzz-fast9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=30 $artifactsprefix || touch failed
# make sure the failing output is visible in the log
if [ -e failed ] ; then
ls fuzzfailure/* |xargs -n1 base64
exit 1
fi
build-fast/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=30 $MAXLEN
done
- name: Run the other fuzzer variants for $fuzzer, with sanitizers etc
- name: Run the slow fuzzer (sanitizer+asserts, good at detecting errors)
run: |
set -x
set -eux
for fuzzer in $allfuzzers; do
build-ossfuzz-withavx/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=20 $artifactsprefix || touch failed
build-ossfuzz-noavx/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 $artifactsprefix || touch failed
build-ossfuzz-noavx9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 $artifactsprefix || touch failed
if [ -e failed ] ; then
# make sure the failing output is visible in the log
ls fuzzfailure/* |xargs -n1 base64
exit 1
fi
echo disable msan runs, it fails inside the fuzzing engine and not the fuzzed code!
echo build-ossfuzz-msan-noavx9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 -reload=0 $artifactsprefix
echo build-ossfuzz-msan-withavx9/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 -reload=0 $artifactsprefix
for implementation in $implementations; do
export SIMDJSON_FORCE_IMPLEMENTATION=$implementation
build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=20 $MAXLEN
done
echo now have $(ls out/$fuzzer |wc -l) files in corpus
done
- name: Minimize the corpus with the fast fuzzer
- name: Minimize the corpus with the fast fuzzer on the default implementation
run: |
for fuzzer in $allfuzzers; do
mkdir -p out/cmin/$fuzzer
build-ossfuzz-fast9/fuzz/fuzz_$fuzzer -merge=1 out/cmin/$fuzzer out/$fuzzer
build-fast/fuzz/fuzz_$fuzzer -merge=1 $MAXLEN out/cmin/$fuzzer out/$fuzzer seedcorpus
rm -rf out/$fuzzer
mv out/cmin/$fuzzer out/$fuzzer
done
@ -97,15 +81,10 @@ jobs:
with:
name: corpus
path: corpus.tar
- name: Run the corpus through valgrind (normal build)
- name: Run the minimized corpus through valgrind (replay build, default implementation)
run: |
for fuzzer in $allfuzzers; do
find out/$fuzzer -type f |sort|xargs valgrind build-plain-noavx/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer-noavx.txt
done
- name: Run the corpus through valgrind (noavx build)
run: |
for fuzzer in $allfuzzers; do
find out/$fuzzer -type f |sort|xargs valgrind build-plain-normal/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer-normal.txt
find out/$fuzzer -type f |sort|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
done
- name: Compress the valgrind output
run: tar cf valgrind.tar valgrind-*.txt
@ -125,3 +104,14 @@ jobs:
else
echo "not on master, won't upload to bintray"
fi
- name: Archive any crashes as an artifact
uses: actions/upload-artifact@v2
if: always()
with:
name: crashes
path: |
crash-*
leak-*
timeout-*
if-no-files-found: ignore

View File

@ -54,10 +54,11 @@ if(ENABLE_FUZZING)
set_property(TEST ${name} APPEND PROPERTY LABELS fuzz)
endfunction()
implement_fuzzer(fuzz_parser)
implement_fuzzer(fuzz_minify)
implement_fuzzer(fuzz_dump)
implement_fuzzer(fuzz_print_json)
implement_fuzzer(fuzz_dump_raw_tape)
implement_fuzzer(fuzz_implementations)
implement_fuzzer(fuzz_minify)
implement_fuzzer(fuzz_parser)
implement_fuzzer(fuzz_print_json)
endif()

View File

@ -1,6 +1,6 @@
#!/bin/sh
#
# Builds a corpus from all json files in the source directory.
# Builds a corpus from all small json files in the source directory.
# The files are renamed to the sha1 of their content, and suffixed
# .json. The files are zipped into a flat file named corpus.zip
@ -10,9 +10,9 @@ tmp=$(mktemp -d)
root=$(readlink -f "$(dirname "$0")/..")
find $root -type f -name "*.json" | while read -r json; do
find $root -type f -size -4k -name "*.json" | while read -r json; do
cp "$json" "$tmp"/$(sha1sum < "$json" |cut -f1 -d' ').json
done
zip --junk-paths -r corpus.zip "$tmp"
zip --quiet --junk-paths -r corpus.zip "$tmp"
rm -rf "$tmp"

View File

@ -12,163 +12,62 @@ unset CXX CC CFLAGS CXXFLAGS LDFLAGS
me=$(basename $0)
# A reproduce build, without avx but otherwise as plain
# as it gets. No sanitizers or optimization.
variant=plain-noavx
# common options
COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++-9 -DCMAKE_C_COMPILER=clang-9 -DSIMDJSON_BUILD_STATIC=On -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_GIT=Off"
# A replay build, as plain as it gets. For use with valgrind/gdb.
variant=replay
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
$COMMON \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=On \
-DSIMDJSON_IMPLEMENTATION_HASWELL=0
ninja all_fuzzers
cd ..
fi
# A reproduce build as plain as it gets. Everythings tunable is
# using the defaults.
variant=plain-normal
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=On
ninja all_fuzzers
cd ..
fi
# a fuzzer with sanitizers, built with avx disabled.
variant=ossfuzz-noavx
if [ ! -d build-$variant ] ; then
export CC=clang
export CXX="clang++"
export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx "
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
# A fuzzer with sanitizers. For improved capability to find bugs.
variant=sanitizers
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
$COMMON \
-DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \
-DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE \
-DSIMDJSON_IMPLEMENTATION_HASWELL=0
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
ninja all_fuzzers
cd ..
fi
# a fuzzer with sanitizers, built with avx disabled.
variant=ossfuzz-noavx9
if which clang++-9 >/dev/null 2>&1 ; then
if [ ! -d build-$variant ] ; then
export CC=clang-9
export CXX="clang++-9"
export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx "
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined -mno-avx2 -mno-avx"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
# A fast fuzzer, for fast exploration rather than finding bugs.
variant=fast
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
$COMMON \
-DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link" \
-DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link" \
-DCMAKE_BUILD_TYPE=Release \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE \
-DSIMDJSON_IMPLEMENTATION_HASWELL=0
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
ninja all_fuzzers
cd ..
fi
else
echo "$me: WARNING clang++-9 not found, please install it to build $variant"
fi
# a fuzzer with sanitizers, default built
variant=ossfuzz-withavx
if [ ! -d build-$variant ] ; then
export CC=clang
export CXX="clang++"
export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined"
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE
ninja all_fuzzers
cd ..
fi
# a fast fuzzer, for fast exploration
variant=ossfuzz-fast9
if which clang++-9 >/dev/null 2>&1 ; then
if [ ! -d build-$variant ] ; then
export CC=clang-9
export CXX="clang++-9"
export CFLAGS="-fsanitize=fuzzer-no-link -O3 -g"
export CXXFLAGS="-fsanitize=fuzzer-no-link -O3 -g"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
mkdir build-$variant
cd build-$variant
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE= \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=OFF \
-DSIMDJSON_GOOGLE_BENCHMARKS=OFF \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE
ninja all_fuzzers
cd ..
fi
else
echo "$me: WARNING clang++-9 not found, please install it to build $variant"
fi

View File

@ -14,7 +14,7 @@ export OUT=$(pwd)/ossfuzz-out
export CC=clang
export CXX="clang++"
export CFLAGS="-fsanitize=fuzzer-no-link"
export CXXFLAGS="-fsanitize=fuzzer-no-link"
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O3"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
$ossfuzz

View File

@ -52,7 +52,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
simdjson::dom::element elem;
auto error = parser.parse(Data, Size).get(elem);
if (error) { return 1; }
if (error) { return 0; }
NulOStream os;
//std::ostream& os(std::cout);
print_json(os,elem);

View File

@ -10,7 +10,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
simdjson::dom::parser parser;
simdjson::dom::element elem;
auto error = parser.parse(Data, Size).get(elem);
if (error) { return 1; }
if (error) { return 0; }
NulOStream os;
SIMDJSON_UNUSED auto dumpstatus = elem.dump_raw_tape(os);

View File

@ -0,0 +1,119 @@
/*
* For fuzzing all of the implementations (haswell/fallback/westmere),
* finding any difference between the output of each which would
* indicate inconsistency. Also, it gets the non-default backend
* some fuzzing love.
*
* Copyright Paul Dreik 20200909 for the simdjson project.
*/
#include "simdjson.h"
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <string>
#include <array>
// store each implementation along with it's intermediate results,
// which would make things easier to debug in case this fuzzer ever
// catches anything
struct Impl {
explicit Impl(const simdjson::implementation* im=nullptr) : impl(im),parser(),element(),error(),output(){}
//silence -Weffc++
Impl(const Impl&)=delete;
Impl& operator=(const Impl&)=delete;
const simdjson::implementation* impl;
simdjson::dom::parser parser;
simdjson::dom::element element;
simdjson::error_code error;
std::string output;
};
template<class Iterator>
void showErrorAndAbort(Iterator first, Iterator last) {
auto it=first;
while(it!=last) {
std::cerr<<"Implementation: "<<it->impl->name()<<"\tError:"<<it->error<<'\n';
it++;
}
std::cerr.flush();
std::abort();
}
template<class Iterator>
void showOutputAndAbort(Iterator first, Iterator last) {
for(auto it=first;it!=last;++it) {
std::cerr<<"Implementation: "<<it->impl->name()<<"\tOutput: "<<it->output<<'\n';
}
// show the pairwise results
for(auto it1=first; it1!=last; ++it1) {
for(auto it2=it1; it2!=last; ++it2) {
if(it1!=it2) {
const bool matches=(it1->output==it2->output);
std::cerr<<"Implementation "<<it1->impl->name()<<" and "<<it2->impl->name()<<(matches?" match.":" do NOT match.")<<'\n';
}
}
}
std::cerr.flush();
std::abort();
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// make this dynamic, so it works regardless of how it was compiled
// or what hardware it runs on
constexpr std::size_t Nimplementations_max=3;
const std::size_t Nimplementations=simdjson::available_implementations.size();
if(Nimplementations>Nimplementations_max) {
//there is another backend added, please bump Nimplementations_max!
std::abort();
}
// get pointers to the backend implementation
std::array<Impl,Nimplementations_max> implementations;
{
std::size_t i=0;
for(auto& e: simdjson::available_implementations) {
implementations[i++].impl=e;
}
}
// let each implementation parse and store the result
std::size_t nerrors=0;
for(auto& e: implementations) {
simdjson::active_implementation=e.impl;
e.error=e.parser.parse(Data,Size).get(e.element);
if(e.error) {
++nerrors;
} else {
std::ostringstream oss;
oss<<e.element;
e.output=oss.str();
}
}
//we should either have no errors, or all should error
if(nerrors!=0) {
if(nerrors!=Nimplementations) {
showErrorAndAbort(implementations.begin(),
implementations.begin()+Nimplementations);
}
return 0;
}
//parsing went well for all. compare the output against the first.
const std::string& reference=implementations[0].output;
for(std::size_t i=1; i<Nimplementations; ++i) {
if(implementations[i].output!=reference) {
showOutputAndAbort(implementations.begin(),
implementations.begin()+Nimplementations);
}
}
//all is well
return 0;
}

View File

@ -11,7 +11,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
simdjson::dom::parser parser;
simdjson::dom::element elem;
auto error = parser.parse(str).get(elem);
if (error) { return 1; }
if (error) { return 0; }
std::string minified=simdjson::minify(elem);
(void)minified;