simplify fuzzing only dynamically supported implementations (#1201)

This refactors the dynamic check of which implementations are supported at runtime.

It also reduces duplicated effort in the CI fuzzing job, the differential fuzzers don't need to run with different values of SIMDJSON_FORCE_IMPLEMENTATION.

There is also a convenience script to run the fuzzers locally, to quickly check that the fuzzers still build, run and no easy to find bugs are there. It should be handy not only when developing the fuzzers, but also when modifying simdjson.
This commit is contained in:
Paul Dreik 2020-10-09 05:29:54 +02:00 committed by GitHub
parent 1f98e64b71
commit 8a68163905
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 133 additions and 45 deletions

View File

@ -12,7 +12,10 @@ jobs:
build:
runs-on: ubuntu-latest
env:
allfuzzers: atpointer dump dump_raw_tape implementations minify minifyimpl parser print_json utf8
# fuzzers that use the default implementation
defaultimplfuzzers: atpointer dump dump_raw_tape minify parser print_json
# fuzzers that loop over the implementations themselves
implfuzzers: implementations minifyimpl utf8
implementations: haswell westmere fallback
UBSAN_OPTIONS: halt_on_error=1
MAXLEN: -max_len=4000
@ -50,20 +53,20 @@ jobs:
- name: Build all the variants
run: fuzz/build_fuzzer_variants.sh
- name: Run the fast fuzzer (release build, default implementation, to explore fast)
- name: Explore fast (release build, default implementation)
run: |
set -eux
for fuzzer in $allfuzzers; do
for fuzzer in $defaultimplfuzzers $implfuzzers; do
mkdir -p out/$fuzzer # in case this is a new fuzzer, or corpus.tar is broken
# get input from everyone else (corpus cross pollination)
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
build-fast/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=30 $MAXLEN
done
- name: Run the slow fuzzer (sanitizer+asserts, good at detecting errors)
- name: Fuzz default impl. fuzzers with sanitizer+asserts (good at detecting errors)
run: |
set -eux
for fuzzer in $allfuzzers; do
for fuzzer in $defaultimplfuzzers; do
# get input from everyone else (corpus cross pollination)
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
for implementation in $implementations; do
@ -73,10 +76,20 @@ jobs:
echo now have $(ls out/$fuzzer |wc -l) files in corpus
done
- name: Fuzz differential impl. fuzzers with sanitizer+asserts (good at detecting errors)
run: |
set -eux
for fuzzer in $implfuzzers; do
# get input from everyone else (corpus cross pollination)
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=20 $MAXLEN
echo now have $(ls out/$fuzzer |wc -l) files in corpus
done
- name: Minimize the corpus with the fast fuzzer on the default implementation
run: |
set -eux
for fuzzer in $allfuzzers; do
for fuzzer in $defaultimplfuzzers $implfuzzers; do
mkdir -p out/cmin/$fuzzer
# get input from everyone else (corpus cross pollination)
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
@ -102,7 +115,7 @@ jobs:
# which are hashes of the content.
- name: Run some of the minimized corpus through valgrind (replay build, default implementation)
run: |
for fuzzer in $allfuzzers; do
for fuzzer in $defaultimplfuzzers $implfuzzers; do
find out/$fuzzer -type f |sort|head -n200|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
done

View File

@ -13,7 +13,8 @@ unset CXX CC CFLAGS CXXFLAGS LDFLAGS
me=$(basename $0)
# common options
COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++-9 -DCMAKE_C_COMPILER=clang-9 -DSIMDJSON_BUILD_STATIC=On -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_GIT=Off"
CLANGVER=-9
COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++$CLANGVER -DCMAKE_C_COMPILER=clang$CLANGVER -DSIMDJSON_BUILD_STATIC=Off -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_GIT=Off"
# A replay build, as plain as it gets. For use with valgrind/gdb.
variant=replay

View File

@ -13,6 +13,7 @@
#include <cstdlib>
#include <string>
#include <array>
#include "supported_implementations.h"
// store each implementation along with it's intermediate results,
@ -64,16 +65,15 @@ void showOutputAndAbort(Iterator first, Iterator last) {
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// since this check is expensive, only do it once
static const auto supported_implementations=get_runtime_supported_implementations();
// make this dynamic, so it works regardless of how it was compiled
// or what hardware it runs on
constexpr std::size_t Nimplementations_max=3;
std::size_t Nimplementations = 0;
const std::size_t Nimplementations = supported_implementations.size();
for(auto impl : simdjson::available_implementations) {
if(impl->supported_by_runtime_system()) {
Nimplementations++;
}
}
if(Nimplementations>Nimplementations_max) {
//there is another backend added, please bump Nimplementations_max!
std::abort();
@ -83,10 +83,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
std::array<Impl,Nimplementations_max> implementations;
{
std::size_t i=0;
for(auto& e: simdjson::available_implementations) {
if(e->supported_by_runtime_system()) {
for(auto& e: supported_implementations) {
implementations[i++].impl=e;
}
}
}

View File

@ -13,9 +13,13 @@
#include <cstddef>
#include <cstdlib>
#include <vector>
#include "supported_implementations.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// since this check is expensive, only do it once
static const auto implementations=get_runtime_supported_implementations();
using Buffer=std::vector<uint8_t>;
auto minify=[Data,Size](const simdjson::implementation* impl) -> Buffer {
Buffer ret(Size);
@ -31,20 +35,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
return ret;
};
auto const first = simdjson::available_implementations.begin();
auto const last = simdjson::available_implementations.end();
auto it = first;
while((it != last) && (!(*it)->supported_by_runtime_system())) { it++; }
assert(it != last);
auto const first = implementations.begin();
auto const last = implementations.end();
const auto reference=minify(*first);
bool failed=false;
for(;it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
for(auto it=first+1;it != last; ++it) {
const auto current=minify(*it);
if(current!=reference) {
failed=true;
@ -53,11 +50,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if(failed) {
std::cerr<<std::boolalpha<<"Mismatch between implementations of minify() found:\n";
for(it = first;it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
const auto current=minify(*it);
for(const auto& e:implementations) {
const auto current=minify(e);
std::string tmp(current.begin(),current.end());
std::cerr<<(*it)->name()<<" returns "<<tmp<<std::endl;
std::cerr<<e->name()<<" returns "<<tmp<<std::endl;
}
std::abort();
}

View File

@ -10,28 +10,28 @@
#include "simdjson.h"
#include <cstddef>
#include <cstdlib>
#include "supported_implementations.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// since this check is expensive, only do it once
static const auto supported_implementations=get_runtime_supported_implementations();
auto utf8verify=[Data,Size](const simdjson::implementation* impl) -> bool {
return impl->validate_utf8((const char*)Data,Size);
};
auto first = simdjson::available_implementations.begin();
auto last = simdjson::available_implementations.end();
auto first = supported_implementations.begin();
auto last = supported_implementations.end();
auto it = first;
while((it != last) && (!(*it)->supported_by_runtime_system())) { it++; }
assert(it != last);
const bool reference=utf8verify(*it);
const bool reference=utf8verify(*first);
bool failed=false;
for(; it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
for(auto it=first+1; it != last; ++it) {
const bool current=utf8verify(*it);
if(current!=reference) {
failed=true;
@ -40,10 +40,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if(failed) {
std::cerr<<std::boolalpha<<"Mismatch between implementations of validate_utf8() found:\n";
for(it = first;it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
const bool current=utf8verify(*it);
std::cerr<<(*it)->name()<<" returns "<<current<<std::endl;
for(const auto& e: supported_implementations) {
if(!e->supported_by_runtime_system()) { continue; }
const bool current=utf8verify(e);
std::cerr<<e->name()<<" returns "<<current<<std::endl;
}
std::abort();
}

50
fuzz/quick_check.sh Executable file
View File

@ -0,0 +1,50 @@
#!/bin/sh
#
# This script is to make a quick check that the fuzzers work,
# good when working locally developing the fuzzers or making
# sure code changes still pass the fuzzers.
#
# It will download the corpus from bintray (kept up to date
# by the crontab github actions) unless a local out/ directory
# already exists.
#
# Run it standing in the root of the simdjson repository.
#
# By Paul Dreik 20201003
set -eu
for prog in wget tar cmake; do
if ! which $prog >/dev/null; then
echo please install $prog
exit 1
fi
done
#download the corpus if it does not already exist
if [ ! -d out ] ; then
wget --quiet https://dl.bintray.com/pauldreik/simdjson-fuzz-corpus/corpus/corpus.tar
tar xf corpus.tar && rm corpus.tar
fi
builddir=build-sanitizers
if [ ! -d $builddir ] ; then
fuzz/build_fuzzer_variants.sh
else
cmake --build $builddir --target all_fuzzers
fi
fuzzernames=$(cmake --build $builddir --target print_all_fuzzernames |tail -n1)
for fuzzer in $fuzzernames ; do
exe=$builddir/fuzz/$fuzzer
shortname=$(echo $fuzzer |cut -f2- -d_)
echo found fuzzer $shortname with executable $exe
mkdir -p out/$shortname
others=$(find out -type d -not -name $shortname -not -name out -not -name cmin)
$exe -max_total_time=20 -max_len=4000 out/$shortname $others
echo "*************************************************************************"
done
echo "all is good, no errors found in any of these fuzzers: $fuzzernames"

View File

@ -0,0 +1,30 @@
#pragma once
#include "simdjson.h"
#include <vector>
#include <cstdlib>
/**
* @brief get_runtime_supported_implementations
* Returns a vector of implementations, which both
* have been compiled *and* are dynamically checked to
* be supported at runtime.
*
* Aborts if no implementations are available (should not happen, fallback
* should always be there for us!)
* @return
*/
std::vector<const simdjson::implementation*>
get_runtime_supported_implementations() {
std::vector<const simdjson::implementation*> ret;
for(auto& e: simdjson::available_implementations) {
if(e->supported_by_runtime_system()) {
ret.emplace_back(e);
}
}
if(ret.empty()) {
// No implementations available, not even fallback, weird.
std::abort();
}
return ret;
}