diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d1dc0b9..341e8e66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,12 @@ add_subdirectory(tools) add_subdirectory(tests) add_subdirectory(benchmark) +# for fuzzing, read the comments in the fuzz/CMakeLists.txt file +option(ENABLE_FUZZING "enable fuzzing (experimental, requires clang)" OFF) +if(ENABLE_FUZZING) +add_subdirectory(fuzz) +endif() + set(CPACK_PACKAGE_VENDOR "Daniel Lemire") set(CPACK_PACKAGE_CONTACT "lemire@gmail.com") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Parsing gigabytes of JSON per second") @@ -62,3 +68,5 @@ set(CPACK_RPM_PACKAGE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") set(CPACK_SOURCE_GENERATOR "TGZ;ZIP") include(CPack) + + diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt new file mode 100644 index 00000000..57143b0d --- /dev/null +++ b/fuzz/CMakeLists.txt @@ -0,0 +1,27 @@ +# First attempt at a fuzzer, using libFuzzer. +# +# compile like this: +# mkdir build-fuzzer +# cd build-fuzzer +# export LDFLAGS="-fsanitize=address,undefined" +# export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined" +# export CFLAGS="-fsanitize=fuzzer-no-link,address,undefined" +# export CXX=clang++ +# export CC=clang++ +# cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DENABLE_FUZZING=On +# ninja + +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_executable(fuzz_parser fuzz_parser.cpp) + target_link_libraries(fuzz_parser PRIVATE ${SIMDJSON_LIB_NAME} ) + target_link_libraries(fuzz_parser PRIVATE "-fsanitize=fuzzer") + + add_executable(fuzz_minify fuzz_minify.cpp) + target_link_libraries(fuzz_minify PRIVATE ${SIMDJSON_LIB_NAME} ) + target_link_libraries(fuzz_minify PRIVATE "-fsanitize=fuzzer") + + add_executable(fuzz_dump fuzz_dump.cpp) + target_link_libraries(fuzz_dump PRIVATE ${SIMDJSON_LIB_NAME} ) + target_link_libraries(fuzz_dump PRIVATE "-fsanitize=fuzzer") +endif() + diff --git a/fuzz/fuzz_dump.cpp b/fuzz/fuzz_dump.cpp new file mode 100644 index 00000000..e842c43c --- /dev/null +++ b/fuzz/fuzz_dump.cpp @@ -0,0 +1,72 @@ +#include "simdjson/jsonparser.h" +#include +#include +#include +#include + +// from https://stackoverflow.com/a/8244052 +class NulStreambuf : public std::streambuf { + char dummyBuffer[64]; + +protected: + virtual int overflow(int c) { + setp(dummyBuffer, dummyBuffer + sizeof(dummyBuffer)); + return (c == traits_type::eof()) ? '\0' : c; + } +}; + +class NulOStream : private NulStreambuf, public std::ostream { +public: + NulOStream() : std::ostream(this) {} + NulStreambuf *rdbuf() { return this; } +}; + +// from the README on the front page +void compute_dump(simdjson::ParsedJson::Iterator &pjh) { + NulOStream os; + + if (pjh.is_object()) { + os << "{"; + if (pjh.down()) { + pjh.print(os); // must be a string + os << ":"; + pjh.next(); + compute_dump(pjh); // let us recurse + while (pjh.next()) { + os << ","; + pjh.print(os); + os << ":"; + pjh.next(); + compute_dump(pjh); // let us recurse + } + pjh.up(); + } + os << "}"; + } else if (pjh.is_array()) { + os << "["; + if (pjh.down()) { + compute_dump(pjh); // let us recurse + while (pjh.next()) { + os << ","; + compute_dump(pjh); // let us recurse + } + pjh.up(); + } + os << "]"; + } else { + pjh.print(os); // just print the lone value + } +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + + try { + auto pj = simdjson::build_parsed_json(Data, Size); + simdjson::ParsedJson::Iterator pjh(pj); + if (pjh.is_ok()) { + compute_dump(pjh); + } + } catch (...) { + } + return 0; +} diff --git a/fuzz/fuzz_minify.cpp b/fuzz/fuzz_minify.cpp new file mode 100644 index 00000000..ab6bc165 --- /dev/null +++ b/fuzz/fuzz_minify.cpp @@ -0,0 +1,15 @@ +#include "simdjson/jsonminifier.h" +#include "simdjson/jsonparser.h" +#include +#include +#include +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + + auto begin = (const char *)Data; + auto end = begin + Size; + + std::string str(begin, end); + + simdjson::json_minify(str.data(), str.size(), str.data()); + return 0; +} diff --git a/fuzz/fuzz_parser.cpp b/fuzz/fuzz_parser.cpp new file mode 100644 index 00000000..c1b11b7e --- /dev/null +++ b/fuzz/fuzz_parser.cpp @@ -0,0 +1,10 @@ +#include "simdjson/jsonparser.h" +#include +#include +#include +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + + auto ignored = simdjson::build_parsed_json(Data, Size); + + return 0; +} diff --git a/tools/cmake/FindOptions.cmake b/tools/cmake/FindOptions.cmake index 281b5258..f9854c1c 100644 --- a/tools/cmake/FindOptions.cmake +++ b/tools/cmake/FindOptions.cmake @@ -46,13 +46,13 @@ set(CMAKE_C_FLAGS_DEBUG "-ggdb") set(CMAKE_C_FLAGS_RELEASE "-O3") endif() -set(CMAKE_C_FLAGS "${STD_FLAGS} ${OPT_FLAGS} ${INCLUDE_FLAGS} ${WARNING_FLAGS} ${SIMDJSON_SANITIZE_FLAGS} ") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${STD_FLAGS} ${OPT_FLAGS} ${INCLUDE_FLAGS} ${WARNING_FLAGS} ${SIMDJSON_SANITIZE_FLAGS} ") if(NOT MSVC) set(CMAKE_CXX_FLAGS_DEBUG "-ggdb") set(CMAKE_CXX_FLAGS_RELEASE "-O3") endif() -set(CMAKE_CXX_FLAGS "${CXXSTD_FLAGS} ${OPT_FLAGS} ${INCLUDE_FLAGS} ${WARNING_FLAGS} ${SIMDJSON_SANITIZE_FLAGS} ") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXSTD_FLAGS} ${OPT_FLAGS} ${INCLUDE_FLAGS} ${WARNING_FLAGS} ${SIMDJSON_SANITIZE_FLAGS} ") if(MSVC) add_definitions( "${OPT_FLAGS} /W3 /D_CRT_SECURE_NO_WARNINGS /wd4005 /wd4996 /wd4267 /wd4244 /wd4113 /nologo")