add minifier fuzzers (#1172)
This adds a minifier fuzzer. There is also an utf-8 fuzzer, but it is disabled until #1187 is fixed. Run all fuzzers bug the utf-8 one in the github CI fuzz.
This commit is contained in:
parent
60c139a844
commit
f44386008c
|
@ -12,7 +12,7 @@ jobs:
|
|||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
allfuzzers: atpointer dump dump_raw_tape parser print_json implementations
|
||||
allfuzzers: atpointer dump dump_raw_tape implementations minify minifyimpl parser print_json
|
||||
implementations: haswell westmere fallback
|
||||
UBSAN_OPTIONS: halt_on_error=1
|
||||
MAXLEN: -max_len=4000
|
||||
|
@ -24,7 +24,7 @@ jobs:
|
|||
sudo apt-get install --quiet ninja-build valgrind zip unzip
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 9
|
||||
sudo ./llvm.sh 10
|
||||
|
||||
- uses: actions/checkout@v1
|
||||
|
||||
|
@ -92,7 +92,7 @@ jobs:
|
|||
done
|
||||
|
||||
- name: Save the corpus as a github artifact
|
||||
uses: actions/upload-artifact@v1
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: corpus
|
||||
path: corpus.tar
|
||||
|
|
|
@ -28,10 +28,6 @@ if(ENABLE_FUZZING)
|
|||
# the fuzz targets, otherwise the cmake configuration step fails.
|
||||
set(SIMDJSON_FUZZ_LDFLAGS "" CACHE STRING "LDFLAGS for the fuzz targets")
|
||||
|
||||
add_custom_target(print_all_fuzz_targets
|
||||
COMMAND ${CMAKE_COMMAND} -E echo ${SOURCES}
|
||||
)
|
||||
|
||||
# Fuzzer build flags and libraries
|
||||
add_library(simdjson-fuzzer INTERFACE)
|
||||
if (SIMDJSON_FUZZ_LINKMAIN)
|
||||
|
@ -46,20 +42,27 @@ if(ENABLE_FUZZING)
|
|||
# Define the fuzzers
|
||||
add_custom_target(all_fuzzers)
|
||||
|
||||
set(fuzzernames)
|
||||
function(implement_fuzzer name)
|
||||
add_executable(${name} ${name}.cpp)
|
||||
target_link_libraries(${name} PRIVATE simdjson-fuzzer)
|
||||
add_dependencies(all_fuzzers ${name})
|
||||
add_test(${name} ${name})
|
||||
set_property(TEST ${name} APPEND PROPERTY LABELS fuzz)
|
||||
set(fuzzernames ${fuzzernames} ${name} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
implement_fuzzer(fuzz_atpointer)
|
||||
implement_fuzzer(fuzz_dump)
|
||||
implement_fuzzer(fuzz_dump_raw_tape)
|
||||
implement_fuzzer(fuzz_implementations)
|
||||
implement_fuzzer(fuzz_minify)
|
||||
implement_fuzzer(fuzz_implementations) # parses and serializes again, compares across implementations
|
||||
implement_fuzzer(fuzz_minify) # minify *with* parsing
|
||||
implement_fuzzer(fuzz_minifyimpl) # minify *without* parsing, plus compare implementations
|
||||
implement_fuzzer(fuzz_parser)
|
||||
implement_fuzzer(fuzz_print_json)
|
||||
# wait for https://github.com/simdjson/simdjson/issues/1187 to be fixed before adding this back
|
||||
#implement_fuzzer(fuzz_utf8) # utf8 verification, compares across implementations
|
||||
|
||||
# to be able to get a list of all fuzzers from within a script
|
||||
add_custom_target(print_all_fuzzernames
|
||||
COMMAND ${CMAKE_COMMAND} -E echo ${fuzzernames})
|
||||
|
||||
endif()
|
||||
|
|
|
@ -14,9 +14,10 @@ export OUT=$(pwd)/ossfuzz-out
|
|||
export CC=clang
|
||||
export CXX="clang++"
|
||||
export CFLAGS="-fsanitize=fuzzer-no-link"
|
||||
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O3"
|
||||
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O1"
|
||||
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
|
||||
|
||||
$ossfuzz
|
||||
|
||||
echo "look at the results in $OUT"
|
||||
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
/*
|
||||
* Minifies by first parsing, then minifying.
|
||||
*/
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
auto begin = as_chars(Data);
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Minifies using the minify() function directly, without parsing.
|
||||
*
|
||||
* For fuzzing all of the implementations (haswell/fallback/westmere),
|
||||
* finding any difference between the output of each which would
|
||||
* indicate inconsistency. Also, it gets the non-default backend
|
||||
* some fuzzing love.
|
||||
*
|
||||
* Copyright Paul Dreik 20200912 for the simdjson project.
|
||||
*/
|
||||
|
||||
#include "simdjson.h"
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
using Buffer=std::vector<uint8_t>;
|
||||
auto minify=[Data,Size](const simdjson::implementation* impl) -> Buffer {
|
||||
Buffer ret(Size);
|
||||
std::size_t retsize=0;
|
||||
auto err=impl->minify(Data,Size,ret.data(),retsize);
|
||||
if(err) {
|
||||
std::string tmp = error_message(err);
|
||||
ret.assign(tmp.begin(),tmp.end());
|
||||
} else {
|
||||
assert(retsize<=Size && "size should not grow by minimize()!");
|
||||
ret.resize(retsize);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
auto first=simdjson::available_implementations.begin();
|
||||
auto last=simdjson::available_implementations.end();
|
||||
|
||||
//make sure there is an implementation
|
||||
assert(first!=last);
|
||||
|
||||
const auto reference=minify(*first);
|
||||
|
||||
bool failed=false;
|
||||
for(auto it=first+1;it!=last; ++it) {
|
||||
const auto current=minify(*it);
|
||||
if(current!=reference) {
|
||||
failed=true;
|
||||
}
|
||||
}
|
||||
|
||||
if(failed) {
|
||||
std::cerr<<std::boolalpha<<"Mismatch between implementations of minify() found:\n";
|
||||
for(auto it=first;it!=last; ++it) {
|
||||
const auto current=minify(*it);
|
||||
std::string tmp(current.begin(),current.end());
|
||||
std::cerr<<(*it)->name()<<" returns "<<tmp<<std::endl;
|
||||
}
|
||||
std::abort();
|
||||
}
|
||||
|
||||
//all is well
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* For fuzzing all of the implementations (haswell/fallback/westmere),
|
||||
* finding any difference between the output of each which would
|
||||
* indicate inconsistency. Also, it gets the non-default backend
|
||||
* some fuzzing love.
|
||||
*
|
||||
* Copyright Paul Dreik 20200912 for the simdjson project.
|
||||
*/
|
||||
|
||||
#include "simdjson.h"
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
auto utf8verify=[Data,Size](const simdjson::implementation* impl) -> bool {
|
||||
return impl->validate_utf8((const char*)Data,Size);
|
||||
};
|
||||
|
||||
|
||||
auto first=simdjson::available_implementations.begin();
|
||||
auto last=simdjson::available_implementations.end();
|
||||
|
||||
//make sure there is an implementation
|
||||
assert(first!=last);
|
||||
|
||||
const bool reference=utf8verify(*first);
|
||||
|
||||
bool failed=false;
|
||||
for(auto it=first+1;it!=last; ++it) {
|
||||
const bool current=utf8verify(*it);
|
||||
if(current!=reference) {
|
||||
failed=true;
|
||||
}
|
||||
}
|
||||
|
||||
if(failed) {
|
||||
std::cerr<<std::boolalpha<<"Mismatch between implementations of validate_utf8() found:\n";
|
||||
for(auto it=first;it!=last; ++it) {
|
||||
const bool current=utf8verify(*it);
|
||||
std::cerr<<(*it)->name()<<" returns "<<current<<std::endl;
|
||||
}
|
||||
std::abort();
|
||||
}
|
||||
|
||||
//all is well
|
||||
return 0;
|
||||
}
|
|
@ -7,9 +7,7 @@
|
|||
# invoke it from the git root.
|
||||
|
||||
# make sure to exit on problems
|
||||
set -e
|
||||
set -u
|
||||
set -x
|
||||
set -eux
|
||||
|
||||
for prog in zip cmake ninja; do
|
||||
if ! which $prog >/dev/null; then
|
||||
|
@ -32,13 +30,17 @@ cmake .. \
|
|||
-DENABLE_FUZZING=On \
|
||||
-DSIMDJSON_COMPETITION=Off \
|
||||
-DSIMDJSON_FUZZ_LINKMAIN=Off \
|
||||
-DSIMDJSON_GIT=Off \
|
||||
-DSIMDJSON_GOOGLE_BENCHMARKS=Off \
|
||||
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE
|
||||
|
||||
cmake --build . --target all_fuzzers
|
||||
|
||||
cp fuzz/fuzz_* $OUT
|
||||
|
||||
# all corpora are equal, they all take json as input
|
||||
# all fuzzers but one (the tiny target for utf8 validation) takes json
|
||||
# as input, therefore use the same corpus of json files for all.
|
||||
for f in $(ls $OUT/fuzz* |grep -v '.zip$') ; do
|
||||
cp ../corpus.zip $OUT/$(basename $f).zip
|
||||
done
|
||||
|
||||
|
|
Loading…
Reference in New Issue