add multi implementation fuzzer (#1162)
This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).
This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.
Also, it will detect if the implementations behave differently:
by making sure they all succeed, or all error
turning the parsed data into text again, should produce equal results
While at it, I corrected some minor things:
clean up building too many variants, run with forced implementation (closes #815 )
always store crashes as artefacts, good in case the fuzzer finds something
return value of the fuzzer function should always be 0
reduce log spam
introduce max size for the seed corpus and the CI fuzzer
2020-09-12 05:46:22 +08:00
|
|
|
/*
|
|
|
|
* For fuzzing all of the implementations (haswell/fallback/westmere),
|
|
|
|
* finding any difference between the output of each which would
|
|
|
|
* indicate inconsistency. Also, it gets the non-default backend
|
|
|
|
* some fuzzing love.
|
|
|
|
*
|
|
|
|
* Copyright Paul Dreik 20200909 for the simdjson project.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "simdjson.h"
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <string>
|
|
|
|
#include <array>
|
2020-10-09 11:29:54 +08:00
|
|
|
#include "supported_implementations.h"
|
add multi implementation fuzzer (#1162)
This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).
This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.
Also, it will detect if the implementations behave differently:
by making sure they all succeed, or all error
turning the parsed data into text again, should produce equal results
While at it, I corrected some minor things:
clean up building too many variants, run with forced implementation (closes #815 )
always store crashes as artefacts, good in case the fuzzer finds something
return value of the fuzzer function should always be 0
reduce log spam
introduce max size for the seed corpus and the CI fuzzer
2020-09-12 05:46:22 +08:00
|
|
|
|
|
|
|
|
|
|
|
// store each implementation along with it's intermediate results,
|
|
|
|
// which would make things easier to debug in case this fuzzer ever
|
|
|
|
// catches anything
|
|
|
|
struct Impl {
|
|
|
|
explicit Impl(const simdjson::implementation* im=nullptr) : impl(im),parser(),element(),error(),output(){}
|
|
|
|
//silence -Weffc++
|
|
|
|
Impl(const Impl&)=delete;
|
|
|
|
Impl& operator=(const Impl&)=delete;
|
|
|
|
|
|
|
|
const simdjson::implementation* impl;
|
|
|
|
simdjson::dom::parser parser;
|
|
|
|
simdjson::dom::element element;
|
|
|
|
simdjson::error_code error;
|
|
|
|
std::string output;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<class Iterator>
|
|
|
|
void showErrorAndAbort(Iterator first, Iterator last) {
|
|
|
|
auto it=first;
|
|
|
|
while(it!=last) {
|
|
|
|
std::cerr<<"Implementation: "<<it->impl->name()<<"\tError:"<<it->error<<'\n';
|
|
|
|
it++;
|
|
|
|
}
|
|
|
|
std::cerr.flush();
|
|
|
|
std::abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
template<class Iterator>
|
|
|
|
void showOutputAndAbort(Iterator first, Iterator last) {
|
|
|
|
|
|
|
|
for(auto it=first;it!=last;++it) {
|
|
|
|
std::cerr<<"Implementation: "<<it->impl->name()<<"\tOutput: "<<it->output<<'\n';
|
|
|
|
}
|
|
|
|
|
|
|
|
// show the pairwise results
|
|
|
|
for(auto it1=first; it1!=last; ++it1) {
|
|
|
|
for(auto it2=it1; it2!=last; ++it2) {
|
|
|
|
if(it1!=it2) {
|
|
|
|
const bool matches=(it1->output==it2->output);
|
|
|
|
std::cerr<<"Implementation "<<it1->impl->name()<<" and "<<it2->impl->name()<<(matches?" match.":" do NOT match.")<<'\n';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::cerr.flush();
|
|
|
|
std::abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
|
|
|
|
2020-10-09 11:29:54 +08:00
|
|
|
// since this check is expensive, only do it once
|
|
|
|
static const auto supported_implementations=get_runtime_supported_implementations();
|
|
|
|
|
|
|
|
|
add multi implementation fuzzer (#1162)
This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).
This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.
Also, it will detect if the implementations behave differently:
by making sure they all succeed, or all error
turning the parsed data into text again, should produce equal results
While at it, I corrected some minor things:
clean up building too many variants, run with forced implementation (closes #815 )
always store crashes as artefacts, good in case the fuzzer finds something
return value of the fuzzer function should always be 0
reduce log spam
introduce max size for the seed corpus and the CI fuzzer
2020-09-12 05:46:22 +08:00
|
|
|
// make this dynamic, so it works regardless of how it was compiled
|
|
|
|
// or what hardware it runs on
|
|
|
|
constexpr std::size_t Nimplementations_max=3;
|
2020-10-09 11:29:54 +08:00
|
|
|
const std::size_t Nimplementations = supported_implementations.size();
|
2020-10-02 23:04:51 +08:00
|
|
|
|
add multi implementation fuzzer (#1162)
This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).
This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.
Also, it will detect if the implementations behave differently:
by making sure they all succeed, or all error
turning the parsed data into text again, should produce equal results
While at it, I corrected some minor things:
clean up building too many variants, run with forced implementation (closes #815 )
always store crashes as artefacts, good in case the fuzzer finds something
return value of the fuzzer function should always be 0
reduce log spam
introduce max size for the seed corpus and the CI fuzzer
2020-09-12 05:46:22 +08:00
|
|
|
if(Nimplementations>Nimplementations_max) {
|
|
|
|
//there is another backend added, please bump Nimplementations_max!
|
|
|
|
std::abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
// get pointers to the backend implementation
|
|
|
|
std::array<Impl,Nimplementations_max> implementations;
|
|
|
|
{
|
|
|
|
std::size_t i=0;
|
2020-10-09 11:29:54 +08:00
|
|
|
for(auto& e: supported_implementations) {
|
2020-10-02 23:04:51 +08:00
|
|
|
implementations[i++].impl=e;
|
add multi implementation fuzzer (#1162)
This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).
This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.
Also, it will detect if the implementations behave differently:
by making sure they all succeed, or all error
turning the parsed data into text again, should produce equal results
While at it, I corrected some minor things:
clean up building too many variants, run with forced implementation (closes #815 )
always store crashes as artefacts, good in case the fuzzer finds something
return value of the fuzzer function should always be 0
reduce log spam
introduce max size for the seed corpus and the CI fuzzer
2020-09-12 05:46:22 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// let each implementation parse and store the result
|
|
|
|
std::size_t nerrors=0;
|
2020-10-01 16:12:37 +08:00
|
|
|
for(std::size_t i=0; i<Nimplementations; ++i) {
|
|
|
|
auto& e=implementations[i];
|
add multi implementation fuzzer (#1162)
This adds a fuzzer which parses the same input using all the available implementations (haswell, westmere, fallback on x64).
This should get the otherwise uncovered sourcefiles (mostly fallback) to show up in the fuzz coverage.
For instance, the fallback directory has only one line covered.
As of the 20200909 report, 1866 lines are covered out of 4478.
Also, it will detect if the implementations behave differently:
by making sure they all succeed, or all error
turning the parsed data into text again, should produce equal results
While at it, I corrected some minor things:
clean up building too many variants, run with forced implementation (closes #815 )
always store crashes as artefacts, good in case the fuzzer finds something
return value of the fuzzer function should always be 0
reduce log spam
introduce max size for the seed corpus and the CI fuzzer
2020-09-12 05:46:22 +08:00
|
|
|
simdjson::active_implementation=e.impl;
|
|
|
|
e.error=e.parser.parse(Data,Size).get(e.element);
|
|
|
|
if(e.error) {
|
|
|
|
++nerrors;
|
|
|
|
} else {
|
|
|
|
std::ostringstream oss;
|
|
|
|
oss<<e.element;
|
|
|
|
e.output=oss.str();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//we should either have no errors, or all should error
|
|
|
|
if(nerrors!=0) {
|
|
|
|
if(nerrors!=Nimplementations) {
|
|
|
|
showErrorAndAbort(implementations.begin(),
|
|
|
|
implementations.begin()+Nimplementations);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
//parsing went well for all. compare the output against the first.
|
|
|
|
const std::string& reference=implementations[0].output;
|
|
|
|
for(std::size_t i=1; i<Nimplementations; ++i) {
|
|
|
|
if(implementations[i].output!=reference) {
|
|
|
|
showOutputAndAbort(implementations.begin(),
|
|
|
|
implementations.begin()+Nimplementations);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//all is well
|
|
|
|
return 0;
|
|
|
|
}
|