fuzz at_pointer
This adds a fuzzer for at_pointer() which recently had a bug. The #1142 bug had been found with this fuzzer Also, it polishes the github action job: cross pollinate the fuzzer corpora (lets fuzzers reuse results from other fuzzers) use github action syntax instead of bash checks only run on push if on master
This commit is contained in:
parent
7fc07e2d5e
commit
30b912fc81
|
@ -1,11 +1,9 @@
|
|||
name: Run fuzzers on stored corpus and test it with valgrind
|
||||
name: Fuzz and run valgrind
|
||||
|
||||
# In the case of a pull request happening at the same time as a cron
|
||||
# job, there is a risk two jobs run at the same time. Therefore,
|
||||
# the corpus is only uploaded for the master branch. Pull requests will
|
||||
# fuzz for a short while, but the results are not uploaded.
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
schedule:
|
||||
- cron: 23 */8 * * *
|
||||
|
@ -14,10 +12,11 @@ jobs:
|
|||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
allfuzzers: parser dump dump_raw_tape print_json implementations
|
||||
allfuzzers: atpointer dump dump_raw_tape parser print_json implementations
|
||||
implementations: haswell westmere fallback
|
||||
UBSAN_OPTIONS: halt_on_error=1
|
||||
MAXLEN: -max_len=4000
|
||||
|
||||
steps:
|
||||
- name: Install packages necessary for building
|
||||
run: |
|
||||
|
@ -28,82 +27,105 @@ jobs:
|
|||
sudo ./llvm.sh 9
|
||||
|
||||
- uses: actions/checkout@v1
|
||||
|
||||
- name: Create and prepare the initial seed corpus
|
||||
run: |
|
||||
fuzz/build_corpus.sh
|
||||
mv corpus.zip seed_corpus.zip
|
||||
mkdir seedcorpus
|
||||
unzip -q -d seedcorpus seed_corpus.zip
|
||||
|
||||
- name: Download the corpus from the last run
|
||||
run: |
|
||||
wget --quiet https://dl.bintray.com/pauldreik/simdjson-fuzz-corpus/corpus/corpus.tar
|
||||
tar xf corpus.tar
|
||||
rm corpus.tar
|
||||
|
||||
- name: List clang versions
|
||||
run: |
|
||||
ls /usr/bin/clang*
|
||||
which clang++
|
||||
clang++ --version
|
||||
|
||||
- name: Build all the variants
|
||||
run: fuzz/build_fuzzer_variants.sh
|
||||
|
||||
- name: Run the fast fuzzer (release build, default implementation, to explore fast)
|
||||
run: |
|
||||
set -eux
|
||||
for fuzzer in $allfuzzers; do
|
||||
mkdir -p out/$fuzzer # in case this is a new fuzzer, or corpus.tar is broken
|
||||
build-fast/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=30 $MAXLEN
|
||||
# get input from everyone else (corpus cross pollination)
|
||||
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
|
||||
build-fast/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=30 $MAXLEN
|
||||
done
|
||||
|
||||
- name: Run the slow fuzzer (sanitizer+asserts, good at detecting errors)
|
||||
run: |
|
||||
set -eux
|
||||
for fuzzer in $allfuzzers; do
|
||||
# get input from everyone else (corpus cross pollination)
|
||||
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
|
||||
for implementation in $implementations; do
|
||||
export SIMDJSON_FORCE_IMPLEMENTATION=$implementation
|
||||
build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer seedcorpus -max_total_time=20 $MAXLEN
|
||||
build-sanitizers/fuzz/fuzz_$fuzzer out/$fuzzer $others seedcorpus -max_total_time=20 $MAXLEN
|
||||
done
|
||||
echo now have $(ls out/$fuzzer |wc -l) files in corpus
|
||||
done
|
||||
|
||||
- name: Minimize the corpus with the fast fuzzer on the default implementation
|
||||
run: |
|
||||
set -eux
|
||||
for fuzzer in $allfuzzers; do
|
||||
mkdir -p out/cmin/$fuzzer
|
||||
build-fast/fuzz/fuzz_$fuzzer -merge=1 $MAXLEN out/cmin/$fuzzer out/$fuzzer seedcorpus
|
||||
# get input from everyone else (corpus cross pollination)
|
||||
others=$(find out -type d -not -name $fuzzer -not -name out -not -name cmin)
|
||||
build-fast/fuzz/fuzz_$fuzzer -merge=1 $MAXLEN out/cmin/$fuzzer out/$fuzzer $others seedcorpus
|
||||
rm -rf out/$fuzzer
|
||||
mv out/cmin/$fuzzer out/$fuzzer
|
||||
done
|
||||
|
||||
- name: Package the corpus into an artifact
|
||||
run: |
|
||||
for fuzzer in $allfuzzers; do
|
||||
tar rf corpus.tar out/$fuzzer
|
||||
done
|
||||
|
||||
- name: Save the corpus as a github artifact
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: corpus
|
||||
path: corpus.tar
|
||||
- name: Run the minimized corpus through valgrind (replay build, default implementation)
|
||||
|
||||
# This takes a subset of the minimized corpus and run it through valgrind. It is slow,
|
||||
# therefore take a "random" subset. The random selection is accomplished by sorting on filenames,
|
||||
# which are hashes of the content.
|
||||
- name: Run some of the minimized corpus through valgrind (replay build, default implementation)
|
||||
run: |
|
||||
for fuzzer in $allfuzzers; do
|
||||
find out/$fuzzer -type f |sort|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
|
||||
find out/$fuzzer -type f |sort|head -n200|xargs -n40 valgrind build-replay/fuzz/fuzz_$fuzzer 2>&1|tee valgrind-$fuzzer.txt
|
||||
done
|
||||
|
||||
- name: Compress the valgrind output
|
||||
run: tar cf valgrind.tar valgrind-*.txt
|
||||
|
||||
- name: Save valgrind output as a github artifact
|
||||
uses: actions/upload-artifact@v1
|
||||
uses: actions/upload-artifact@v2
|
||||
if: always()
|
||||
with:
|
||||
name: valgrindresults
|
||||
path: valgrind.tar
|
||||
if-no-files-found: ignore
|
||||
|
||||
- name: Upload the corpus and results to bintray if we are on master
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
run: |
|
||||
if [ $(git rev-parse --verify HEAD) = $(git rev-parse --verify origin/master) ] ; then
|
||||
echo uploading each artifact twice, otherwise it will not be published
|
||||
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
|
||||
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
|
||||
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
|
||||
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
|
||||
else
|
||||
echo "not on master, won't upload to bintray"
|
||||
fi
|
||||
echo uploading each artifact twice, otherwise it will not be published
|
||||
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
|
||||
curl -T corpus.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/corpus.tar";publish=1;override=1"
|
||||
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
|
||||
curl -T valgrind.tar -upauldreik:${{ secrets.bintrayApiKey }} https://api.bintray.com/content/pauldreik/simdjson-fuzz-corpus/corpus/0/corpus/valgrind.tar";publish=1;override=1"
|
||||
|
||||
- name: Archive any crashes as an artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
if: always()
|
||||
|
|
|
@ -54,6 +54,7 @@ if(ENABLE_FUZZING)
|
|||
set_property(TEST ${name} APPEND PROPERTY LABELS fuzz)
|
||||
endfunction()
|
||||
|
||||
implement_fuzzer(fuzz_atpointer)
|
||||
implement_fuzzer(fuzz_dump)
|
||||
implement_fuzzer(fuzz_dump_raw_tape)
|
||||
implement_fuzzer(fuzz_implementations)
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
#ifndef SIMDJSON_FUZZUTILS_H
|
||||
#define SIMDJSON_FUZZUTILS_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
// view data as a byte pointer
|
||||
template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
|
||||
return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
|
||||
}
|
||||
|
||||
// view data as a char pointer
|
||||
template <typename T> inline const char* as_chars(const T* data) {
|
||||
return static_cast<const char*>(static_cast<const void*>(data));
|
||||
}
|
||||
|
||||
|
||||
#endif // SIMDJSON_FUZZUTILS_H
|
|
@ -0,0 +1,68 @@
|
|||
#include "simdjson.h"
|
||||
#include "FuzzUtils.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
struct FuzzData {
|
||||
std::string_view json_pointer;
|
||||
std::string_view json_doc;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief split split fuzz data into a pointer and a document
|
||||
* @param Data
|
||||
* @param Size
|
||||
* @return
|
||||
*/
|
||||
FuzzData split(const char *Data, size_t Size) {
|
||||
|
||||
using namespace std::literals;
|
||||
constexpr auto sep="\n~~~\n"sv;
|
||||
|
||||
std::string_view all(Data,Size);
|
||||
auto pos=all.find(sep);
|
||||
if(pos==std::string_view::npos) {
|
||||
//not found.
|
||||
return FuzzData{std::string_view{},all};
|
||||
} else {
|
||||
return FuzzData{std::string_view{all.substr(0,pos)},all.substr(pos+sep.size())};
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
// Split data into two strings, json pointer and the document string.
|
||||
// Might end up with none, either or both being empty, important for
|
||||
// covering edge cases such as https://github.com/simdjson/simdjson/issues/1142
|
||||
// Inputs missing the separator line will get an empty json pointer
|
||||
// but the all the input put in the document string. This means
|
||||
// test data from other fuzzers that take json input works for this fuzzer
|
||||
// as well.
|
||||
const auto fd=split(as_chars(Data),Size);
|
||||
|
||||
simdjson::dom::parser parser;
|
||||
|
||||
// parse without exceptions, for speed
|
||||
auto res=parser.parse(fd.json_doc.data(),fd.json_doc.size());
|
||||
if(res.error())
|
||||
return 0;
|
||||
|
||||
simdjson::dom::element root;
|
||||
if(res.get(root))
|
||||
return 0;
|
||||
|
||||
auto maybe_leaf=root.at_pointer(fd.json_pointer);
|
||||
if(maybe_leaf.error())
|
||||
return 0;
|
||||
|
||||
simdjson::dom::element leaf;
|
||||
if(maybe_leaf.get(leaf))
|
||||
return 0;
|
||||
|
||||
std::string_view sv;
|
||||
if(leaf.get_string().get(sv))
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
|
@ -1,10 +1,11 @@
|
|||
#include "simdjson.h"
|
||||
#include "FuzzUtils.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
|
||||
auto begin = (const char *)Data;
|
||||
auto begin = as_chars(Data);
|
||||
auto end = begin + Size;
|
||||
|
||||
std::string str(begin, end);
|
||||
|
|
|
@ -2,11 +2,8 @@
|
|||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include "FuzzUtils.h"
|
||||
|
||||
// view data as a byte pointer
|
||||
template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
|
||||
return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
|
||||
}
|
||||
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, std::size_t Size);
|
||||
|
|
Loading…
Reference in New Issue