add ossfuzz support (#362)

* initial oss-fuzz friendly build

parts taken from libfmt, which I wrote and have the copyright to

* fix build error

* add script for building a corpus zip

see https://google.github.io/oss-fuzz/getting-started/new-project-guide/#seed-corpus

* fix zip command

* drop setting the C++ standard

* disable the minify fuzzer, does not pass oss-fuzz check-build test

* fix integer overflow in subnormal_power10

detected by oss-fuzz

https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=18714

* invoke the build like oss fuzz does

* document what the scripts are for and how to use them

* add a page about fuzzing
This commit is contained in:
Paul Dreik 2019-11-08 16:32:43 +01:00 committed by Daniel Lemire
parent c4f1baad31
commit 8ae818e17c
8 changed files with 170 additions and 14 deletions

View File

@ -49,7 +49,7 @@ add_subdirectory(tests)
add_subdirectory(benchmark)
# for fuzzing, read the comments in the fuzz/CMakeLists.txt file
option(ENABLE_FUZZING "enable fuzzing (experimental, requires clang)" OFF)
option(ENABLE_FUZZING "enable building the fuzzers" ON)
if(ENABLE_FUZZING)
add_subdirectory(fuzz)
endif()

View File

@ -8,20 +8,43 @@
# export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined"
# export CXX=clang++
# export CC=clang++
# cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DENABLE_FUZZING=On
# cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DENABLE_FUZZING=On -DSIMDJSON_FUZZ_LINKMAIN=Off -DSIMDJSON_FUZZ_LDFLAGS=-fsanitize=fuzzer
# ninja
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_executable(fuzz_parser fuzz_parser.cpp)
target_link_libraries(fuzz_parser PRIVATE ${SIMDJSON_LIB_NAME} )
target_link_libraries(fuzz_parser PRIVATE "-fsanitize=fuzzer")
add_executable(fuzz_minify fuzz_minify.cpp)
target_link_libraries(fuzz_minify PRIVATE ${SIMDJSON_LIB_NAME} )
target_link_libraries(fuzz_minify PRIVATE "-fsanitize=fuzzer")
# settings this links in a main. useful for reproducing,
# kcov, gdb, afl, valgrind.
# (note that libFuzzer can also reproduce, just pass it the files)
#
# Using this by default, means the fuzzers will be built as a part of the normal
# workflow, meaning they wont bitrot and will participate in refactoring etc.
#
option(SIMDJSON_FUZZ_LINKMAIN "links a main into fuzz targets for building reproducers" On)
add_executable(fuzz_dump fuzz_dump.cpp)
target_link_libraries(fuzz_dump PRIVATE ${SIMDJSON_LIB_NAME} )
target_link_libraries(fuzz_dump PRIVATE "-fsanitize=fuzzer")
# For oss-fuzz - insert $LIB_FUZZING_ENGINE into the link flags, but only for
# the fuzz targets, otherwise the cmake configuration step fails.
set(SIMDJSON_FUZZ_LDFLAGS "" CACHE STRING "LDFLAGS for the fuzz targets")
set(SOURCES
fuzz_parser.cpp
# fuzz_minify.cpp # <--- does not pass the build check test on oss-fuzz, says "partially instrumented". help needed!
fuzz_dump.cpp
)
macro(implement_fuzzer sourcefile)
get_filename_component(basename ${sourcefile} NAME_WE)
set(name ${basename})
add_executable(${name} ${sourcefile})
if (SIMDJSON_FUZZ_LINKMAIN)
target_sources(${name} PRIVATE main.cpp)
endif ()
target_link_libraries(${name} PRIVATE ${SIMDJSON_LIB_NAME})
if (SIMDJSON_FUZZ_LDFLAGS)
target_link_libraries(${name} PRIVATE ${SIMDJSON_FUZZ_LDFLAGS})
endif ()
endmacro ()
foreach (X IN ITEMS ${SOURCES})
implement_fuzzer(${X})
endforeach ()

24
fuzz/Fuzzing.md Normal file
View File

@ -0,0 +1,24 @@
# Fuzzing
[Fuzzing](https://en.wikipedia.org/wiki/Fuzzing) is efficient for finding bugs. Here are a few bugs found by fuzzing:
- https://github.com/lemire/simdjson/issues/353
- https://github.com/lemire/simdjson/issues/351
- https://github.com/lemire/simdjson/issues/345
Simdjson is continuously fuzzed on [oss-fuzz](https://github.com/google/oss-fuzz).
## Running the fuzzers locally
Make sure you have clang and cmake installed.
The easiest way to get started is to run the following, standing in the root of the checked out repo:
```
fuzz/build_like_ossfuzz.sh
```
Then invoke a fuzzer as tshown by the following example:
```
mkdir -p out/parser
build/fuzz/fuzz_parser out/parser/
```

18
fuzz/build_corpus.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/sh
#
# Builds a corpus from all json files in the source directory.
# The files are renamed to the sha1 of their content, and suffixed
# .json. The files are zipped into a flat file named corpus.zip
set -eu
tmp=$(mktemp -d)
root=$(readlink -f "$(dirname "$0")/..")
find $root -type f -name "*.json" | while read -r json; do
cp "$json" "$tmp"/$(sha1sum < "$json" |cut -f1 -d' ').json
done
zip --junk-paths -r corpus.zip "$tmp"
rm -rf "$tmp"

22
fuzz/build_like_ossfuzz.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/sh
#
# This script emulates how oss fuzz invokes the build
# process, handy for trouble shooting cmake issues and possibly
# recreating testcases. For proper debugging of the oss fuzz
# build, follow the procedure at https://google.github.io/oss-fuzz/getting-started/new-project-guide/#testing-locally
set -eu
ossfuzz=$(readlink -f $(dirname $0))/ossfuzz.sh
mkdir -p ossfuzz-out
export OUT=$(pwd)/ossfuzz-out
export CC=clang
export CXX="clang++"
export CFLAGS="-fsanitize=fuzzer-no-link"
export CXXFLAGS="-fsanitize=fuzzer-no-link"
export LIB_FUZZING_ENGINE="-fsanitize=fuzzer"
$ossfuzz
echo "look at the results in $OUT"

26
fuzz/main.cpp Normal file
View File

@ -0,0 +1,26 @@
#include <cassert>
#include <fstream>
#include <sstream>
#include <vector>
// view data as a byte pointer
template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, std::size_t Size);
int main(int argc, char* argv[]) {
for (int i = 1; i < argc; ++i) {
std::ifstream in(argv[i]);
assert(in);
in.seekg(0, std::ios_base::end);
const auto pos = in.tellg();
assert(pos >= 0);
in.seekg(0, std::ios_base::beg);
std::vector<char> buf(static_cast<std::size_t>(pos));
in.read(buf.data(), static_cast<long>(buf.size()));
assert(in.gcount() == pos);
LLVMFuzzerTestOneInput(as_bytes(buf.data()), buf.size());
}
}

42
fuzz/ossfuzz.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/sh
#
# entry point for oss-fuzz, so that fuzzers
# and build invocation can be changed without having
# to modify the oss-fuzz repo.
#
# invoke it from the git root.
# make sure to exit on problems
set -e
set -u
for prog in zip cmake ninja; do
if ! which $prog >/dev/null; then
echo please install $prog
exit 1
fi
done
# build the corpus (all inputs are json, the same corpus can be used for everyone)
fuzz/build_corpus.sh
mkdir build
cd build
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_BUILD_STATIC=On \
-DENABLE_FUZZING=On \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE
cmake --build .
cp fuzz/fuzz_* $OUT
# all corpora are equal, they all take json as input
for f in $OUT/fuzz* ; do
cp ../corpus.zip $OUT/$(basename $f).zip
done

View File

@ -3,6 +3,7 @@
#include "simdjson/portability.h"
#include <cstring>
#include <memory>
#include <string>
namespace simdjson {
// low-level function to allocate memory with padding so we can read passed the