simplify fuzzing only dynamically supported implementations (#1201)
This refactors the dynamic check of which implementations are supported at runtime. It also reduces duplicated effort in the CI fuzzing job, the differential fuzzers don't need to run with different values of SIMDJSON_FORCE_IMPLEMENTATION. There is also a convenience script to run the fuzzers locally, to quickly check that the fuzzers still build, run and no easy to find bugs are there. It should be handy not only when developing the fuzzers, but also when modifying simdjson.
This commit is contained in:
parent
1f98e64b71
commit
8a68163905
|
@ -12,7 +12,10 @@ jobs:
|
|||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
allfuzzers: atpointer dump dump_raw_tape implementations minify minifyimpl parser print_json utf8
|
||||
# fuzzers that use the default implementation
|
||||
defaultimplfuzzers: atpointer dump dump_raw_tape minify parser print_json
|
||||
# fuzzers that loop over the implementations themselves
|
||||
implfuzzers: implementations minifyimpl utf8
|
||||
implementations: haswell westmere fallback
|
||||
UBSAN_OPTIONS: halt_on_error=1
|
||||
MAXLEN: -max_len=4000
|
||||
|
@ -50,20 +53,20 @@ jobs:
|
|||
- name: Build all the variants
|
||||
run: fuzz/build_fuzzer_variants.sh
|
||||
|
||||
- name: Run the fast fuzzer (release build, default implementation, to explore fast)
|
||||
- name: Explore fast (release build, default implementation)
|
||||
run: |
|
||||
set -eux
|
||||
for fuzzer in $allfuzzers; do
|
||||
for fuzzer in $defaultimplfuzzers $implfuzzers; do
|
||||
mkdir -p out/$fuzzer # in case this is a new fuzzer, or corpus.tar is broken
|
||||
# get input from everyone else (corpus cross pollination)
|
||||
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
|
||||
build-fast/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=30 $MAXLEN
|
||||
done
|
||||
|
||||
- name: Run the slow fuzzer (sanitizer+asserts, good at detecting errors)
|
||||
- name: Fuzz default impl. fuzzers with sanitizer+asserts (good at detecting errors)
|
||||
run: |
|
||||
set -eux
|
||||
for fuzzer in $allfuzzers; do
|
||||
for fuzzer in $defaultimplfuzzers; do
|
||||
# get input from everyone else (corpus cross pollination)
|
||||
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
|
||||
for implementation in $implementations; do
|
||||
|
@ -73,10 +76,20 @@ jobs:
|
|||
echo now have $(ls out/$fuzzer |wc -l) files in corpus
|
||||
done
|
||||
|
||||
- name: Fuzz differential impl. fuzzers with sanitizer+asserts (good at detecting errors)
|
||||
run: |
|
||||
set -eux
|
||||
for fuzzer in $implfuzzers; do
|
||||
# get input from everyone else (corpus cross pollination)
|
||||
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
|
||||
build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=20 $MAXLEN
|
||||
echo now have $(ls out/$fuzzer |wc -l) files in corpus
|
||||
done
|
||||
|
||||
- name: Minimize the corpus with the fast fuzzer on the default implementation
|
||||
run: |
|
||||
set -eux
|
||||
for fuzzer in $allfuzzers; do
|
||||
for fuzzer in $defaultimplfuzzers $implfuzzers; do
|
||||
mkdir -p out/cmin/$fuzzer
|
||||
# get input from everyone else (corpus cross pollination)
|
||||
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
|
||||
|
@ -102,7 +115,7 @@ jobs:
|
|||
# which are hashes of the content.
|
||||
- name: Run some of the minimized corpus through valgrind (replay build, default implementation)
|
||||
run: |
|
||||
for fuzzer in $allfuzzers; do
|
||||
for fuzzer in $defaultimplfuzzers $implfuzzers; do
|
||||
find out/$fuzzer -type f |sort|head -n200|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
|
||||
done
|
||||
|
||||
|
|
|
@ -13,7 +13,8 @@ unset CXX CC CFLAGS CXXFLAGS LDFLAGS
|
|||
me=$(basename $0)
|
||||
|
||||
# common options
|
||||
COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++-9 -DCMAKE_C_COMPILER=clang-9 -DSIMDJSON_BUILD_STATIC=On -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_GIT=Off"
|
||||
CLANGVER=-9
|
||||
COMMON="-GNinja -DCMAKE_CXX_COMPILER=clang++$CLANGVER -DCMAKE_C_COMPILER=clang$CLANGVER -DSIMDJSON_BUILD_STATIC=Off -DENABLE_FUZZING=On -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_GIT=Off"
|
||||
|
||||
# A replay build, as plain as it gets. For use with valgrind/gdb.
|
||||
variant=replay
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <array>
|
||||
#include "supported_implementations.h"
|
||||
|
||||
|
||||
// store each implementation along with it's intermediate results,
|
||||
|
@ -64,16 +65,15 @@ void showOutputAndAbort(Iterator first, Iterator last) {
|
|||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
// since this check is expensive, only do it once
|
||||
static const auto supported_implementations=get_runtime_supported_implementations();
|
||||
|
||||
|
||||
// make this dynamic, so it works regardless of how it was compiled
|
||||
// or what hardware it runs on
|
||||
constexpr std::size_t Nimplementations_max=3;
|
||||
std::size_t Nimplementations = 0;
|
||||
const std::size_t Nimplementations = supported_implementations.size();
|
||||
|
||||
for(auto impl : simdjson::available_implementations) {
|
||||
if(impl->supported_by_runtime_system()) {
|
||||
Nimplementations++;
|
||||
}
|
||||
}
|
||||
if(Nimplementations>Nimplementations_max) {
|
||||
//there is another backend added, please bump Nimplementations_max!
|
||||
std::abort();
|
||||
|
@ -83,10 +83,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
|||
std::array<Impl,Nimplementations_max> implementations;
|
||||
{
|
||||
std::size_t i=0;
|
||||
for(auto& e: simdjson::available_implementations) {
|
||||
if(e->supported_by_runtime_system()) {
|
||||
for(auto& e: supported_implementations) {
|
||||
implementations[i++].impl=e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,9 +13,13 @@
|
|||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include "supported_implementations.h"
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
// since this check is expensive, only do it once
|
||||
static const auto implementations=get_runtime_supported_implementations();
|
||||
|
||||
using Buffer=std::vector<uint8_t>;
|
||||
auto minify=[Data,Size](const simdjson::implementation* impl) -> Buffer {
|
||||
Buffer ret(Size);
|
||||
|
@ -31,20 +35,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
|||
return ret;
|
||||
};
|
||||
|
||||
|
||||
auto const first = simdjson::available_implementations.begin();
|
||||
auto const last = simdjson::available_implementations.end();
|
||||
|
||||
|
||||
auto it = first;
|
||||
while((it != last) && (!(*it)->supported_by_runtime_system())) { it++; }
|
||||
assert(it != last);
|
||||
auto const first = implementations.begin();
|
||||
auto const last = implementations.end();
|
||||
|
||||
const auto reference=minify(*first);
|
||||
|
||||
bool failed=false;
|
||||
for(;it != last; ++it) {
|
||||
if(!(*it)->supported_by_runtime_system()) { continue; }
|
||||
for(auto it=first+1;it != last; ++it) {
|
||||
const auto current=minify(*it);
|
||||
if(current!=reference) {
|
||||
failed=true;
|
||||
|
@ -53,11 +50,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
|||
|
||||
if(failed) {
|
||||
std::cerr<<std::boolalpha<<"Mismatch between implementations of minify() found:\n";
|
||||
for(it = first;it != last; ++it) {
|
||||
if(!(*it)->supported_by_runtime_system()) { continue; }
|
||||
const auto current=minify(*it);
|
||||
for(const auto& e:implementations) {
|
||||
const auto current=minify(e);
|
||||
std::string tmp(current.begin(),current.end());
|
||||
std::cerr<<(*it)->name()<<" returns "<<tmp<<std::endl;
|
||||
std::cerr<<e->name()<<" returns "<<tmp<<std::endl;
|
||||
}
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -10,28 +10,28 @@
|
|||
#include "simdjson.h"
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include "supported_implementations.h"
|
||||
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
// since this check is expensive, only do it once
|
||||
static const auto supported_implementations=get_runtime_supported_implementations();
|
||||
|
||||
|
||||
auto utf8verify=[Data,Size](const simdjson::implementation* impl) -> bool {
|
||||
return impl->validate_utf8((const char*)Data,Size);
|
||||
};
|
||||
|
||||
|
||||
auto first = simdjson::available_implementations.begin();
|
||||
auto last = simdjson::available_implementations.end();
|
||||
auto first = supported_implementations.begin();
|
||||
auto last = supported_implementations.end();
|
||||
|
||||
|
||||
auto it = first;
|
||||
while((it != last) && (!(*it)->supported_by_runtime_system())) { it++; }
|
||||
assert(it != last);
|
||||
|
||||
|
||||
const bool reference=utf8verify(*it);
|
||||
const bool reference=utf8verify(*first);
|
||||
|
||||
bool failed=false;
|
||||
for(; it != last; ++it) {
|
||||
if(!(*it)->supported_by_runtime_system()) { continue; }
|
||||
for(auto it=first+1; it != last; ++it) {
|
||||
const bool current=utf8verify(*it);
|
||||
if(current!=reference) {
|
||||
failed=true;
|
||||
|
@ -40,10 +40,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
|||
|
||||
if(failed) {
|
||||
std::cerr<<std::boolalpha<<"Mismatch between implementations of validate_utf8() found:\n";
|
||||
for(it = first;it != last; ++it) {
|
||||
if(!(*it)->supported_by_runtime_system()) { continue; }
|
||||
const bool current=utf8verify(*it);
|
||||
std::cerr<<(*it)->name()<<" returns "<<current<<std::endl;
|
||||
for(const auto& e: supported_implementations) {
|
||||
if(!e->supported_by_runtime_system()) { continue; }
|
||||
const bool current=utf8verify(e);
|
||||
std::cerr<<e->name()<<" returns "<<current<<std::endl;
|
||||
}
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# This script is to make a quick check that the fuzzers work,
|
||||
# good when working locally developing the fuzzers or making
|
||||
# sure code changes still pass the fuzzers.
|
||||
#
|
||||
# It will download the corpus from bintray (kept up to date
|
||||
# by the crontab github actions) unless a local out/ directory
|
||||
# already exists.
|
||||
#
|
||||
# Run it standing in the root of the simdjson repository.
|
||||
#
|
||||
# By Paul Dreik 20201003
|
||||
|
||||
set -eu
|
||||
|
||||
for prog in wget tar cmake; do
|
||||
if ! which $prog >/dev/null; then
|
||||
echo please install $prog
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
#download the corpus if it does not already exist
|
||||
if [ ! -d out ] ; then
|
||||
wget --quiet https://dl.bintray.com/pauldreik/simdjson-fuzz-corpus/corpus/corpus.tar
|
||||
tar xf corpus.tar && rm corpus.tar
|
||||
fi
|
||||
|
||||
builddir=build-sanitizers
|
||||
|
||||
if [ ! -d $builddir ] ; then
|
||||
fuzz/build_fuzzer_variants.sh
|
||||
else
|
||||
cmake --build $builddir --target all_fuzzers
|
||||
fi
|
||||
|
||||
fuzzernames=$(cmake --build $builddir --target print_all_fuzzernames |tail -n1)
|
||||
|
||||
for fuzzer in $fuzzernames ; do
|
||||
exe=$builddir/fuzz/$fuzzer
|
||||
shortname=$(echo $fuzzer |cut -f2- -d_)
|
||||
echo found fuzzer $shortname with executable $exe
|
||||
mkdir -p out/$shortname
|
||||
others=$(find out -type d -not -name $shortname -not -name out -not -name cmin)
|
||||
$exe -max_total_time=20 -max_len=4000 out/$shortname $others
|
||||
echo "*************************************************************************"
|
||||
done
|
||||
echo "all is good, no errors found in any of these fuzzers: $fuzzernames"
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
|
||||
#include "simdjson.h"
|
||||
#include <vector>
|
||||
#include <cstdlib>
|
||||
|
||||
/**
|
||||
* @brief get_runtime_supported_implementations
|
||||
* Returns a vector of implementations, which both
|
||||
* have been compiled *and* are dynamically checked to
|
||||
* be supported at runtime.
|
||||
*
|
||||
* Aborts if no implementations are available (should not happen, fallback
|
||||
* should always be there for us!)
|
||||
* @return
|
||||
*/
|
||||
std::vector<const simdjson::implementation*>
|
||||
get_runtime_supported_implementations() {
|
||||
std::vector<const simdjson::implementation*> ret;
|
||||
for(auto& e: simdjson::available_implementations) {
|
||||
if(e->supported_by_runtime_system()) {
|
||||
ret.emplace_back(e);
|
||||
}
|
||||
}
|
||||
if(ret.empty()) {
|
||||
// No implementations available, not even fallback, weird.
|
||||
std::abort();
|
||||
}
|
||||
return ret;
|
||||
}
|
Loading…
Reference in New Issue