fuzz at_pointer

This adds a fuzzer for at_pointer() which recently had a bug.

The #1142 bug had been found with this fuzzer

Also, it polishes the github action job:

    cross pollinate the fuzzer corpora (lets fuzzers reuse results from other fuzzers)
    use github action syntax instead of bash checks
    only run on push if on master
This commit is contained in:
Paul Dreik 2020-09-16 21:17:43 +02:00 committed by GitHub
parent 7fc07e2d5e
commit 30b912fc81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 132 additions and 26 deletions

View File

@ -1,11 +1,9 @@
name: Run fuzzers on stored corpus and test it with valgrind
name: Fuzz and run valgrind
# In the case of a pull request happening at the same time as a cron
# job, there is a risk two jobs run at the same time. Therefore,
# the corpus is only uploaded for the master branch. Pull requests will
# fuzz for a short while, but the results are not uploaded.
on:
push:
branches:
- master
pull_request:
schedule:
- cron: 23 */8 * * *
@ -14,10 +12,11 @@ jobs:
build:
runs-on: ubuntu-latest
env:
allfuzzers: parser dump dump_raw_tape print_json implementations
allfuzzers: atpointer dump dump_raw_tape parser print_json implementations
implementations: haswell westmere fallback
UBSAN_OPTIONS: halt_on_error=1
MAXLEN: -max_len=4000
steps:
- name: Install packages necessary for building
run: |
@ -28,82 +27,105 @@ jobs:
sudo ./llvm.sh 9
- uses: actions/checkout@v1
- name: Create and prepare the initial seed corpus
run: |
fuzz/build_corpus.sh
mv corpus.zip seed_corpus.zip
mkdir seedcorpus
unzip -q -d seedcorpus seed_corpus.zip
- name: Download the corpus from the last run
run: |
wget --quiet https://dl.bintray.com/pauldreik/simdjson-fuzz-corpus/corpus/corpus.tar
tar xf corpus.tar
rm corpus.tar
- name: List clang versions
run: |
ls /usr/bin/clang*
which clang++
clang++ --version
- name: Build all the variants
run: fuzz/build_fuzzer_variants.sh
- name: Run the fast fuzzer (release build, default implementation, to explore fast)
run: |
set -eux
for fuzzer in $allfuzzers; do
mkdir -p out/$fuzzer # in case this is a new fuzzer, or corpus.tar is broken
build-fast/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=30 $MAXLEN
# get input from everyone else (corpus cross pollination)
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
build-fast/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=30 $MAXLEN
done
- name: Run the slow fuzzer (sanitizer+asserts, good at detecting errors)
run: |
set -eux
for fuzzer in $allfuzzers; do
# get input from everyone else (corpus cross pollination)
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
for implementation in $implementations; do
export SIMDJSON_FORCE_IMPLEMENTATION=$implementation
build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=20 $MAXLEN
build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=20 $MAXLEN
done
echo now have $(ls out/$fuzzer |wc -l) files in corpus
done
- name: Minimize the corpus with the fast fuzzer on the default implementation
run: |
set -eux
for fuzzer in $allfuzzers; do
mkdir -p out/cmin/$fuzzer
build-fast/fuzz/fuzz_$fuzzer -merge=1 $MAXLEN out/cmin/$fuzzer out/$fuzzer seedcorpus
# get input from everyone else (corpus cross pollination)
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
build-fast/fuzz/fuzz_$fuzzer -merge=1 $MAXLEN out/cmin/$fuzzer out/$fuzzer $others seedcorpus
rm -rf out/$fuzzer
mv out/cmin/$fuzzer out/$fuzzer
done
- name: Package the corpus into an artifact
run: |
for fuzzer in $allfuzzers; do
tar rf corpus.tar out/$fuzzer
done
- name: Save the corpus as a github artifact
uses: actions/upload-artifact@v1
with:
name: corpus
path: corpus.tar
- name: Run the minimized corpus through valgrind (replay build, default implementation)
# This takes a subset of the minimized corpus and run it through valgrind. It is slow,
# therefore take a "random" subset. The random selection is accomplished by sorting on filenames,
# which are hashes of the content.
- name: Run some of the minimized corpus through valgrind (replay build, default implementation)
run: |
for fuzzer in $allfuzzers; do
find out/$fuzzer -type f |sort|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
find out/$fuzzer -type f |sort|head -n200|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
done
- name: Compress the valgrind output
run: tar cf valgrind.tar valgrind-*.txt
- name: Save valgrind output as a github artifact
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v2
if: always()
with:
name: valgrindresults
path: valgrind.tar
if-no-files-found: ignore
- name: Upload the corpus and results to bintray if we are on master
if: ${{ github.event_name == 'schedule' }}
run: |
if [ $(git rev-parse --verify HEAD) = $(git rev-parse --verify origin/master) ] ; then
echo uploading each artifact twice, otherwise it will not be published
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
else
echo "not on master, won't upload to bintray"
fi
echo uploading each artifact twice, otherwise it will not be published
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
- name: Archive any crashes as an artifact
uses: actions/upload-artifact@v2
if: always()

View File

@ -54,6 +54,7 @@ if(ENABLE_FUZZING)
set_property(TEST ${name} APPEND PROPERTY LABELS fuzz)
endfunction()
implement_fuzzer(fuzz_atpointer)
implement_fuzzer(fuzz_dump)
implement_fuzzer(fuzz_dump_raw_tape)
implement_fuzzer(fuzz_implementations)

17
fuzz/FuzzUtils.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef SIMDJSON_FUZZUTILS_H
#define SIMDJSON_FUZZUTILS_H
#include <cstdint>
// view data as a byte pointer
template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
}
// view data as a char pointer
template <typename T> inline const char* as_chars(const T* data) {
return static_cast<const char*>(static_cast<const void*>(data));
}
#endif // SIMDJSON_FUZZUTILS_H

68
fuzz/fuzz_atpointer.cpp Normal file
View File

@ -0,0 +1,68 @@
#include "simdjson.h"
#include "FuzzUtils.h"
#include <cstddef>
#include <cstdint>
#include <string>
#include <string_view>
struct FuzzData {
std::string_view json_pointer;
std::string_view json_doc;
};
/**
* @brief split split fuzz data into a pointer and a document
* @param Data
* @param Size
* @return
*/
FuzzData split(const char *Data, size_t Size) {
using namespace std::literals;
constexpr auto sep="\n~~~\n"sv;
std::string_view all(Data,Size);
auto pos=all.find(sep);
if(pos==std::string_view::npos) {
//not found.
return FuzzData{std::string_view{},all};
} else {
return FuzzData{std::string_view{all.substr(0,pos)},all.substr(pos+sep.size())};
}
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// Split data into two strings, json pointer and the document string.
// Might end up with none, either or both being empty, important for
// covering edge cases such as https://github.com/simdjson/simdjson/issues/1142
// Inputs missing the separator line will get an empty json pointer
// but the all the input put in the document string. This means
// test data from other fuzzers that take json input works for this fuzzer
// as well.
const auto fd=split(as_chars(Data),Size);
simdjson::dom::parser parser;
// parse without exceptions, for speed
auto res=parser.parse(fd.json_doc.data(),fd.json_doc.size());
if(res.error())
return 0;
simdjson::dom::element root;
if(res.get(root))
return 0;
auto maybe_leaf=root.at_pointer(fd.json_pointer);
if(maybe_leaf.error())
return 0;
simdjson::dom::element leaf;
if(maybe_leaf.get(leaf))
return 0;
std::string_view sv;
if(leaf.get_string().get(sv))
return 0;
return 0;
}

View File

@ -1,10 +1,11 @@
#include "simdjson.h"
#include "FuzzUtils.h"
#include <cstddef>
#include <cstdint>
#include <string>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
auto begin = (const char *)Data;
auto begin = as_chars(Data);
auto end = begin + Size;
std::string str(begin, end);

View File

@ -2,11 +2,8 @@
#include <fstream>
#include <sstream>
#include <vector>
#include "FuzzUtils.h"
// view data as a byte pointer
template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, std::size_t Size);