add minifier fuzzers (#1172)

This adds a minifier fuzzer. There is also an utf-8 fuzzer, but it is disabled until  #1187 is fixed.

Run all fuzzers bug the utf-8 one in the github CI fuzz.
This commit is contained in:
Paul Dreik 2020-09-26 14:25:00 +02:00 committed by GitHub
parent 60c139a844
commit f44386008c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 137 additions and 16 deletions

View File

@ -12,7 +12,7 @@ jobs:
build:
runs-on: ubuntu-latest
env:
allfuzzers: atpointer dump dump_raw_tape parser print_json implementations
allfuzzers: atpointer dump dump_raw_tape implementations minify minifyimpl parser print_json
implementations: haswell westmere fallback
UBSAN_OPTIONS: halt_on_error=1
MAXLEN: -max_len=4000
@ -24,7 +24,7 @@ jobs:
sudo apt-get install --quiet ninja-build valgrind zip unzip
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 9
sudo ./llvm.sh 10
- uses: actions/checkout@v1
@ -92,7 +92,7 @@ jobs:
done
- name: Save the corpus as a github artifact
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v2
with:
name: corpus
path: corpus.tar

View File

@ -28,10 +28,6 @@ if(ENABLE_FUZZING)
# the fuzz targets, otherwise the cmake configuration step fails.
set(SIMDJSON_FUZZ_LDFLAGS "" CACHE STRING "LDFLAGS for the fuzz targets")
add_custom_target(print_all_fuzz_targets
COMMAND ${CMAKE_COMMAND} -E echo ${SOURCES}
)
# Fuzzer build flags and libraries
add_library(simdjson-fuzzer INTERFACE)
if (SIMDJSON_FUZZ_LINKMAIN)
@ -46,20 +42,27 @@ if(ENABLE_FUZZING)
# Define the fuzzers
add_custom_target(all_fuzzers)
set(fuzzernames)
function(implement_fuzzer name)
add_executable(${name} ${name}.cpp)
target_link_libraries(${name} PRIVATE simdjson-fuzzer)
add_dependencies(all_fuzzers ${name})
add_test(${name} ${name})
set_property(TEST ${name} APPEND PROPERTY LABELS fuzz)
set(fuzzernames ${fuzzernames} ${name} PARENT_SCOPE)
endfunction()
implement_fuzzer(fuzz_atpointer)
implement_fuzzer(fuzz_dump)
implement_fuzzer(fuzz_dump_raw_tape)
implement_fuzzer(fuzz_implementations)
implement_fuzzer(fuzz_minify)
implement_fuzzer(fuzz_implementations) # parses and serializes again, compares across implementations
implement_fuzzer(fuzz_minify) # minify *with* parsing
implement_fuzzer(fuzz_minifyimpl) # minify *without* parsing, plus compare implementations
implement_fuzzer(fuzz_parser)
implement_fuzzer(fuzz_print_json)
# wait for https://github.com/simdjson/simdjson/issues/1187 to be fixed before adding this back
#implement_fuzzer(fuzz_utf8) # utf8 verification, compares across implementations
# to be able to get a list of all fuzzers from within a script
add_custom_target(print_all_fuzzernames
COMMAND ${CMAKE_COMMAND} -E echo ${fuzzernames})
endif()

View File

@ -14,9 +14,10 @@ export OUT=$(pwd)/ossfuzz-out
export CC=clang
export CXX="clang++"
export CFLAGS="-fsanitize=fuzzer-no-link"
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O3"
export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O1"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
$ossfuzz
echo "look at the results in $OUT"

View File

@ -3,6 +3,10 @@
#include <cstddef>
#include <cstdint>
#include <string>
/*
* Minifies by first parsing, then minifying.
*/
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
auto begin = as_chars(Data);

63
fuzz/fuzz_minifyimpl.cpp Normal file
View File

@ -0,0 +1,63 @@
/*
* Minifies using the minify() function directly, without parsing.
*
* For fuzzing all of the implementations (haswell/fallback/westmere),
* finding any difference between the output of each which would
* indicate inconsistency. Also, it gets the non-default backend
* some fuzzing love.
*
* Copyright Paul Dreik 20200912 for the simdjson project.
*/
#include "simdjson.h"
#include <cstddef>
#include <cstdlib>
#include <vector>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
using Buffer=std::vector<uint8_t>;
auto minify=[Data,Size](const simdjson::implementation* impl) -> Buffer {
Buffer ret(Size);
std::size_t retsize=0;
auto err=impl->minify(Data,Size,ret.data(),retsize);
if(err) {
std::string tmp = error_message(err);
ret.assign(tmp.begin(),tmp.end());
} else {
assert(retsize<=Size && "size should not grow by minimize()!");
ret.resize(retsize);
}
return ret;
};
auto first=simdjson::available_implementations.begin();
auto last=simdjson::available_implementations.end();
//make sure there is an implementation
assert(first!=last);
const auto reference=minify(*first);
bool failed=false;
for(auto it=first+1;it!=last; ++it) {
const auto current=minify(*it);
if(current!=reference) {
failed=true;
}
}
if(failed) {
std::cerr<<std::boolalpha<<"Mismatch between implementations of minify() found:\n";
for(auto it=first;it!=last; ++it) {
const auto current=minify(*it);
std::string tmp(current.begin(),current.end());
std::cerr<<(*it)->name()<<" returns "<<tmp<<std::endl;
}
std::abort();
}
//all is well
return 0;
}

48
fuzz/fuzz_utf8.cpp Normal file
View File

@ -0,0 +1,48 @@
/*
* For fuzzing all of the implementations (haswell/fallback/westmere),
* finding any difference between the output of each which would
* indicate inconsistency. Also, it gets the non-default backend
* some fuzzing love.
*
* Copyright Paul Dreik 20200912 for the simdjson project.
*/
#include "simdjson.h"
#include <cstddef>
#include <cstdlib>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
auto utf8verify=[Data,Size](const simdjson::implementation* impl) -> bool {
return impl->validate_utf8((const char*)Data,Size);
};
auto first=simdjson::available_implementations.begin();
auto last=simdjson::available_implementations.end();
//make sure there is an implementation
assert(first!=last);
const bool reference=utf8verify(*first);
bool failed=false;
for(auto it=first+1;it!=last; ++it) {
const bool current=utf8verify(*it);
if(current!=reference) {
failed=true;
}
}
if(failed) {
std::cerr<<std::boolalpha<<"Mismatch between implementations of validate_utf8() found:\n";
for(auto it=first;it!=last; ++it) {
const bool current=utf8verify(*it);
std::cerr<<(*it)->name()<<" returns "<<current<<std::endl;
}
std::abort();
}
//all is well
return 0;
}

View File

@ -7,9 +7,7 @@
# invoke it from the git root.
# make sure to exit on problems
set -e
set -u
set -x
set -eux
for prog in zip cmake ninja; do
if ! which $prog >/dev/null; then
@ -32,13 +30,17 @@ cmake .. \
-DENABLE_FUZZING=On \
-DSIMDJSON_COMPETITION=Off \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_GIT=Off \
-DSIMDJSON_GOOGLE_BENCHMARKS=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE
cmake --build . --target all_fuzzers
cp fuzz/fuzz_* $OUT
# all corpora are equal, they all take json as input
# all fuzzers but one (the tiny target for utf8 validation) takes json
# as input, therefore use the same corpus of json files for all.
for f in $(ls $OUT/fuzz* |grep -v '.zip$') ; do
cp ../corpus.zip $OUT/$(basename $f).zip
done