Make it possible to check that an implementation is supported at runtime (#1197)

* Make it possible to check that an implementation is supported at runtime.

* add CI fuzzing on arm 64 bit

This adds fuzzing on drone.io arm64

For some reason, leak detection had to be disabled. If it is enabled, the fuzzer falsely reports a crash at the end of fuzzing.

Closes: #1188

* Guarding the implementation accesses.

* Better doc.

* Updating cxxopts.

* Make it possible to check that an implementation is supported at runtime.

* Guarding the implementation accesses.

* Better doc.

* Updating cxxopts.

* We need to accomodate cxxopts

Co-authored-by: Paul Dreik <github@pauldreik.se>
This commit is contained in:
Daniel Lemire 2020-10-02 11:04:51 -04:00 committed by GitHub
parent e06ddea784
commit 9865bb6904
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 159 additions and 25 deletions

View File

@ -96,8 +96,14 @@ struct option_struct {
case 'v':
verbose = true;
break;
case 'a':
simdjson::active_implementation = simdjson::available_implementations[optarg];
case 'a': {
auto impl = simdjson::available_implementations[optarg];
if(impl && impl->supported_by_runtime_system()) {
simdjson::active_implementation = impl;
} else {
std::cerr << "implementation " << optarg << " not found or not supported " << std::endl;
}
}
break;
case 's':
if (!strcmp(optarg, "stage1")) {

View File

@ -125,10 +125,12 @@ int main(int argc, char *argv[]) {
size_t outlength;
uint8_t *cbuffer = (uint8_t *)buffer;
for (auto imple : simdjson::available_implementations) {
if(imple->supported_by_runtime_system()) {
BEST_TIME((std::string("simdjson->minify+")+imple->name()).c_str(), (imple->minify(cbuffer, p.size(), cbuffer, outlength) == simdjson::SUCCESS ? outlength : -1),
outlength, memcpy(buffer, p.data(), p.size()), repeat, volume,
!just_data);
}
}
printf("minisize = %zu, original size = %zu (minified down to %.2f percent "
"of original) \n",

View File

@ -67,8 +67,10 @@ void print_usage(ostream& out) {
out << "-a IMPL - Use the given parser implementation. By default, detects the most advanced" << endl;
out << " implementation supported on the host machine." << endl;
for (auto impl : simdjson::available_implementations) {
if(impl->supported_by_runtime_system()) {
out << "-a " << std::left << std::setw(9) << impl->name() << " - Use the " << impl->description() << " parser implementation." << endl;
}
}
}
void exit_usage(string message) {
@ -115,12 +117,14 @@ struct option_struct {
break;
case 'a': {
const implementation *impl = simdjson::available_implementations[optarg];
if (!impl) {
if ((!impl) || (!impl->supported_by_runtime_system())) {
std::string exit_message = string("Unsupported option value -a ") + optarg + ": expected -a with one of ";
for (auto imple : simdjson::available_implementations) {
if(imple->supported_by_runtime_system()) {
exit_message += imple->name();
exit_message += " ";
}
}
exit_usage(exit_message);
}
simdjson::active_implementation = impl;

@ -1 +1 @@
Subproject commit 794c975287355de48158d9a80ed502d26b20a472
Subproject commit 4b63c333a842295b1bfb79d05863633037328300

View File

@ -6,6 +6,7 @@ CPU Architecture-Specific Implementations
* [Inspecting the Detected Implementation](#inspecting-the-detected-implementation)
* [Querying Available Implementations](#querying-available-implementations)
* [Manually Selecting the Implementation](#manually-selecting-the-implementation)
* [Checking that an Implementation can Run on your System](#checking-that-an-implementation-can-run-on-your-system)
Overview
--------
@ -70,6 +71,14 @@ And look them up by name:
```c++
cout << simdjson::available_implementations["fallback"]->description() << endl;
```
Though the fallback implementation should always be available, others might be missing. When
an implementation is not available, the bracket call `simdjson::available_implementations[name]`
will return the null pointer.
The available implementations have been compiled but may not necessarily be run safely on your system
see [Checking that an Implementation can Run on your System](#checking-that-an-implementation-can-run-on-your-system).
Manually Selecting the Implementation
-------------------------------------
@ -81,3 +90,30 @@ can select the CPU architecture yourself:
// Use the fallback implementation, even though my machine is fast enough for anything
simdjson::active_implementation = simdjson::available_implementations["fallback"];
```
You are responsible for ensuring that the requirements of the selected implementation match your current system.
Furthermore, you should check that the implementation is available before setting it to `simdjson::active_implementation`
by comparing it with the null pointer.
```c++
auto my_implementation = simdjson::available_implementations["haswell"];
if(! my_implementation) { exit(1); }
if(! my_implementation->supported_by_runtime_system()) { exit(1); }
simdjson::active_implementation = my_implementation;
```
Checking that an Implementation can Run on your System
-------------------------------------
You should call `supported_by_runtime_system()` to compare the processor's features with the need of the implementation.
```c++
for (auto implementation : simdjson::available_implementations) {
if(implementation->supported_by_runtime_system()) {
cout << implementation->name() << ": " << implementation->description() << endl;
}
}
```
The call to `supported_by_runtime_system()` maybe relatively expensive. Do not call `supported_by_runtime_system()` each
time you parse a JSON input (for example). It is meant to be called a handful of times at most in the life of a program.

View File

@ -67,7 +67,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// make this dynamic, so it works regardless of how it was compiled
// or what hardware it runs on
constexpr std::size_t Nimplementations_max=3;
const std::size_t Nimplementations=simdjson::available_implementations.size();
std::size_t Nimplementations = 0;
for(auto impl : simdjson::available_implementations) {
if(impl->supported_by_runtime_system()) {
Nimplementations++;
}
}
if(Nimplementations>Nimplementations_max) {
//there is another backend added, please bump Nimplementations_max!
std::abort();
@ -78,9 +84,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
{
std::size_t i=0;
for(auto& e: simdjson::available_implementations) {
if(e->supported_by_runtime_system()) {
implementations[i++].impl=e;
}
}
}
// let each implementation parse and store the result
std::size_t nerrors=0;

View File

@ -32,16 +32,19 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
};
auto first=simdjson::available_implementations.begin();
auto last=simdjson::available_implementations.end();
auto const first = simdjson::available_implementations.begin();
auto const last = simdjson::available_implementations.end();
//make sure there is an implementation
assert(first!=last);
auto it = first;
while((it != last) && (!(*it)->supported_by_runtime_system())) { it++; }
assert(it != last);
const auto reference=minify(*first);
bool failed=false;
for(auto it=first+1;it!=last; ++it) {
for(;it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
const auto current=minify(*it);
if(current!=reference) {
failed=true;
@ -50,7 +53,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if(failed) {
std::cerr<<std::boolalpha<<"Mismatch between implementations of minify() found:\n";
for(auto it=first;it!=last; ++it) {
for(it = first;it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
const auto current=minify(*it);
std::string tmp(current.begin(),current.end());
std::cerr<<(*it)->name()<<" returns "<<tmp<<std::endl;

View File

@ -18,16 +18,20 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
};
auto first=simdjson::available_implementations.begin();
auto last=simdjson::available_implementations.end();
auto first = simdjson::available_implementations.begin();
auto last = simdjson::available_implementations.end();
//make sure there is an implementation
assert(first!=last);
const bool reference=utf8verify(*first);
auto it = first;
while((it != last) && (!(*it)->supported_by_runtime_system())) { it++; }
assert(it != last);
const bool reference=utf8verify(*it);
bool failed=false;
for(auto it=first+1;it!=last; ++it) {
for(; it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
const bool current=utf8verify(*it);
if(current!=reference) {
failed=true;
@ -36,7 +40,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if(failed) {
std::cerr<<std::boolalpha<<"Mismatch between implementations of validate_utf8() found:\n";
for(auto it=first;it!=last; ++it) {
for(it = first;it != last; ++it) {
if(!(*it)->supported_by_runtime_system()) { continue; }
const bool current=utf8verify(*it);
std::cerr<<(*it)->name()<<" returns "<<current<<std::endl;
}

View File

@ -72,6 +72,16 @@ public:
*/
virtual const std::string &description() const { return _description; }
/**
* The instruction sets this implementation is compiled against
* and the current CPU match. This function may poll the current CPU/system
* and should therefore not be called too often if performance is a concern.
*
*
* @return true if the implementation can be safely used on the current system (determined at runtime)
*/
bool supported_by_runtime_system() const;
/**
* @private For internal implementation use
*
@ -180,6 +190,7 @@ public:
*
* const implementation *impl = simdjson::available_implementations["westmere"];
* if (!impl) { exit(1); }
* if (!imp->supported_by_runtime_system()) { exit(1); }
* simdjson::active_implementation = impl;
*
* @param name the implementation to find, e.g. "westmere", "haswell", "arm64"

View File

@ -30,6 +30,13 @@ namespace simdjson { namespace internal { const fallback::implementation fallbac
#endif // SIMDJSON_IMPLEMENTATION_FALLBACK
namespace simdjson {
bool implementation::supported_by_runtime_system() const {
uint32_t required_instruction_sets = this->required_instruction_sets();
uint32_t supported_instruction_sets = detect_supported_architectures();
return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets);
}
namespace internal {
/**

View File

@ -1868,6 +1868,10 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
if(!impl->supported_by_runtime_system()) {
fprintf(stderr, "The selected implementation does not match your current CPU: -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}

View File

@ -340,6 +340,10 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
if(!impl->supported_by_runtime_system()) {
fprintf(stderr, "The selected implementation does not match your current CPU: -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}

View File

@ -202,6 +202,10 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
if(!impl->supported_by_runtime_system()) {
fprintf(stderr, "The selected implementation does not match your current CPU: -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}

View File

@ -119,6 +119,10 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
if(!impl->supported_by_runtime_system()) {
fprintf(stderr, "The selected implementation does not match your current CPU: -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}

View File

@ -117,6 +117,10 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
if(!impl->supported_by_runtime_system()) {
fprintf(stderr, "The selected implementation does not match your current CPU: -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}

View File

@ -201,10 +201,24 @@ void implementation_selection_2() {
}
}
void implementation_selection_2_safe() {
for (auto implementation : simdjson::available_implementations) {
if(implementation->supported_by_runtime_system()) {
cout << implementation->name() << ": " << implementation->description() << endl;
}
}
}
void implementation_selection_3() {
cout << simdjson::available_implementations["fallback"]->description() << endl;
}
void implementation_selection_safe() {
auto my_implementation = simdjson::available_implementations["haswell"];
if(! my_implementation) { exit(1); }
if(! my_implementation->supported_by_runtime_system()) { exit(1); }
simdjson::active_implementation = my_implementation;
}
void implementation_selection_4() {
// Use the fallback implementation, even though my machine is fast enough for anything
simdjson::active_implementation = simdjson::available_implementations["fallback"];

View File

@ -9,7 +9,11 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#include "cxxopts.hpp"
SIMDJSON_POP_DISABLE_WARNINGS
#if CXXOPTS__VERSION_MAJOR < 3
int main(int argc, char *argv[]) {
#else
int main(int argc, const char *argv[]) {
#endif
#ifdef __cpp_exceptions
try {
#endif

View File

@ -188,8 +188,11 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
recurse(doc, s, 0);
return s;
}
#if CXXOPTS__VERSION_MAJOR < 3
int main(int argc, char *argv[]) {
#else
int main(int argc, const char *argv[]) {
#endif
#ifdef __cpp_exceptions
try {
#endif

View File

@ -20,7 +20,11 @@ void usage(std::string message) {
std::cerr << options.help() << std::endl;
}
#if CXXOPTS__VERSION_MAJOR < 3
int main(int argc, char *argv[]) {
#else
int main(int argc, const char *argv[]) {
#endif
#ifdef __cpp_exceptions
try {
#endif
@ -28,8 +32,10 @@ int main(int argc, char *argv[]) {
ss << "Parser implementation (by default, detects the most advanced implementation supported on the host machine)." << std::endl;
ss << "Available parser implementations:" << std::endl;
for (auto impl : simdjson::available_implementations) {
if(impl->supported_by_runtime_system()) {
ss << "-a " << std::left << std::setw(9) << impl->name() << " - Use the " << impl->description() << " parser implementation." << std::endl;
}
}
options.add_options()
("a,arch", ss.str(), cxxopts::value<std::string>())
("f,file", "File name.", cxxopts::value<std::string>())
@ -54,6 +60,10 @@ int main(int argc, char *argv[]) {
usage("Unsupported implementation.");
return EXIT_FAILURE;
}
if(!impl->supported_by_runtime_system()) {
usage("The selected implementation does not match your current CPU.");
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
}