Merge branch 'master' into jkeiser/ondemand-scalar-order

2021-01-13 14:21:16 -05:00 · 2021-01-13 14:21:16 -05:00 · 990da22249
parent be61650102 3849cc400e
commit 990da22249
22 changed files with 120 additions and 37 deletions
--- a/.github/workflows/alpine.yml
+++ b/.github/workflows/alpine.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ubuntu-build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/fuzzers.yml
+++ b/.github/workflows/fuzzers.yml
@ -11,7 +11,10 @@ on:
    - cron: 23 */8 * * *

 jobs:
-  build:    
+  build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-latest
    env:
      # fuzzers that change behaviour with SIMDJSON_FORCE_IMPLEMENTATION
--- a/.github/workflows/mingw-ci.yml
+++ b/.github/workflows/mingw-ci.yml
@ -15,6 +15,9 @@ on:

 jobs:
  ci:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    name: windows-gcc
    runs-on: windows-2016

--- a/.github/workflows/mingw64-ci.yml
+++ b/.github/workflows/mingw64-ci.yml
@ -15,6 +15,9 @@ on:

 jobs:
  ci:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    name: windows-gcc
    runs-on: windows-2016

--- a/.github/workflows/msys2.yml
+++ b/.github/workflows/msys2.yml
@ -10,6 +10,9 @@ on:

 jobs:
  windows-mingw:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    name: ${{ matrix.msystem }}
    runs-on: windows-latest
    defaults:
--- a/.github/workflows/power-fuzz.yml
+++ b/.github/workflows/power-fuzz.yml
@ -8,6 +8,9 @@ on:

 jobs:
  armv7_job:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    # The host should always be Linux
    runs-on: ubuntu-20.04
    name: Build on ubuntu-20.04 ppc64le
--- a/.github/workflows/ubuntu18-checkperf.yml
+++ b/.github/workflows/ubuntu18-checkperf.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ubuntu-build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-18.04
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/ubuntu18-threadsani.yml
+++ b/.github/workflows/ubuntu18-threadsani.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ubuntu-build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-18.04
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/ubuntu18.yml
+++ b/.github/workflows/ubuntu18.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ubuntu-build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-18.04
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/ubuntu20-checkperf.yml
+++ b/.github/workflows/ubuntu20-checkperf.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ubuntu-build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/ubuntu20-threadsani.yml
+++ b/.github/workflows/ubuntu20-threadsani.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ubuntu-build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/ubuntu20.yml
+++ b/.github/workflows/ubuntu20.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ubuntu-build:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/vs16-ci.yml
+++ b/.github/workflows/vs16-ci.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ci:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    name: windows-vs16
    runs-on: windows-latest
    steps:
--- a/.github/workflows/vs16-clang-ci.yml
+++ b/.github/workflows/vs16-clang-ci.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ci:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    name: windows-vs16
    runs-on: windows-latest
    steps:
--- a/.github/workflows/vs16-ninja-ci.yml
+++ b/.github/workflows/vs16-ninja-ci.yml
@ -10,6 +10,9 @@ on:

 jobs:
  ci:
+    if: >-
+      ! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
+      ! contains(toJSON(github.event.commits.*.message), '[skip github]')
    name: windows-vs16
    runs-on: windows-latest
    steps:
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -70,6 +70,8 @@ Pull requests are always invited. However, we ask that you follow these guidelin
 - Changes should be focused and minimal. You should change as few lines of code as possible. Please do not reformat or touch files needlessly.
 - New features must be accompanied of new tests, in general.
 - Your code should pass our continuous-integration tests. It is your responsability to ensure that your proposal pass the tests. We do not merge pull requests that would break our build.
+   - An exception to this would be changes to non-code files, such as documentation and assets, or trivial changes to code, such as comments, where it is encouraged to explicitly ask for skipping a CI run using the `[skip ci]` prefix in your Pull Request title **and** in the first line of the most recent commit in a push. Example for such a commit: `[skip ci] Fixed typo in power_of_ten's docs`
+   This benefits the project in such a way that the CI pipeline is not burdened by running jobs on changes that don't change any behavior in the code, which reduces wait times for other Pull Requests that do change behavior and require testing.

 If the benefits of your proposed code remain unclear, we may choose to discard your code: that is not an insult, we frequently discard our own code. We may also consider various alternatives and choose another path. Again, that is not an insult or a sign that you have wasted your time.

--- a/doc/ondemand.md
+++ b/doc/ondemand.md
@ -1,18 +1,18 @@
 On Demand Basics
 ================

-On Demand is a new, faster simdjson API with all the ease-of-use you're used to. While it provides a
-familiar DOM interface, under the hood it is anything but: it is parsing values *as you use them.*
-This means you don't waste time parsing JSON you don't use, and you don't pay the cost of generating
+On Demand is a new, faster simdjson API with all the ease-of-use you are used to. While it provides a
+familiar DOM interface, under the hood it is different: it is parsing values *as you use them.*
+With On Demand, you do not waste time parsing JSON you do not use, and you do not pay the cost of generating
 an intermediate DOM tree.

-An overview of what you need to know to use simdjson, with examples.
+We provide an overview of what you need to know to use the simdjson On Demand API, with examples.

-* [Including ondemand](#including-ondemand)
+* [Including ondemand](#including-on-demand)
 * [The Basics: Loading and Parsing JSON Documents](#the-basics-loading-and-parsing-json-documents)
 * [Using the Parsed JSON](#using-the-parsed-json)

-ondemand supports the same JSON standards and C++ compilers as simdjson's older DOM API. Refer to the DOM docs for more information:
+The On Demand API supports the same JSON standards and C++ compilers as simdjson's DOM API. Refer to the DOM docs for more information:

 * [Requirements](basics.md##requirements)
 * [Using simdjson as a CMake dependency](#using-simdjson-as-a-cmake-dependency)
@ -25,10 +25,10 @@ ondemand supports the same JSON standards and C++ compilers as simdjson's older
 * [C++17 Support](basics.md#c17-support)
 * [Backwards Compatibility](basics.md#backwards-compatibility)

-For deeper information about the design and implementation of simdjson's ondemand API, refer to
+For deeper information about the design and implementation of the simdjson On Demand API, refer to
 the [design document](ondemand.md).

-Including ondemand
+Including On Demand
 ------------------

 To include simdjson, copy [simdjson.h](/singleheader/simdjson.h) and [simdjson.cpp](/singleheader/simdjson.cpp)
@ -37,32 +37,20 @@ into your project. Then include it in your project with:
 ```c++
 #include "simdjson.h"
 using namespace simdjson; // optional
-using namespace simdjson::builtin; // optional, for ondemand
+using namespace simdjson::builtin; // optional, for On Demand
 ```

-You can compile with:
+You can generally compile with:

 ```
-c++ -march=native myproject.cpp simdjson.cpp
+c++ -O3 myproject.cpp simdjson.cpp
 ```

+
 Note:
 - Users on macOS and other platforms where compilers do not provide C++11 compliant by default
 should request it with the appropriate flag (e.g., `c++ -march=native -std=c++17 myproject.cpp simdjson.cpp`).

-### The native architecture flag
-
-Passing `-march=native` to the compiler makes On Demand much faster by allowing it to use
-optimizations specific to your machine. You cannot do this, however, if you are compiling code
-that might be run on less advanced machines.
-
-On Demand uses advanced architecture-specific code for many common processors to make JSON
-preprocessing and string parsing faster. By default, however, most c++ compilers will compile to the
-least common denominator (since the program could theoretically be run anywhere). Since On Demand is
-inlined into your own code, it cannot use these advanced versions unless the compiler is told to
-target them. -march=native says "target the current computer," which is a reasonable default for
-many applications which both compile and run on the same processor.
-
 The Basics: Loading and Parsing JSON Documents
 ----------------------------------------------

@ -90,8 +78,8 @@ Documents Are Iterators
 A `document` is *not* a fully-parsed JSON value; rather, it is an **iterator** over the JSON text.
 This means that while you iterate an array, or search for a field in an object, it is actually
 walking through the original JSON text, merrily reading commas and colons and brackets to make sure
-you get where you're going. This is the key to On Demand's performance: since it's just an iterator,
-it lets you parse values as you use them. And particularly, it lets you *skip* values you don't want
+you get where you are going. This is the key to On Demand's performance: since it's just an iterator,
+it lets you parse values as you use them. And particularly, it lets you *skip* values you do not want
 to use.

 ### Parser, Document and JSON Scope
@ -100,7 +88,7 @@ Because a document is an iterator over the JSON text, both the JSON text and the
 remain alive (in scope) while you are using it. Further, a `parser` may have at most
 one document open at a time, since it holds allocated memory used for the parsing.

-During the `iterate` call, the original JSON text is never modified--only read. After you're done
+During the `iterate` call, the original JSON text is never modified--only read. After you are done
 with the document, the source (whether file or string) can be safely discarded.

 For best performance, a `parser` instance should be reused over several files: otherwise you will
@ -120,7 +108,7 @@ support for users who avoid exceptions. See [the simdjson DOM API's error handli
  or the initial `[` or `{` will be verified. An exception is thrown if the cast is not possible.

  > IMPORTANT NOTE: values can only be parsed once. Since documents are *iterators*, once you have
-  > parsed a value (such as by casting to double), you can't get at it again.
+  > parsed a value (such as by casting to double), you cannot get at it again.
 * **Field Access:** To get the value of the "foo" field in an object, use `object["foo"]`. This will
  scan through the object looking for the field with the matching string.

--- a/doc/ondemand_design.md
+++ b/doc/ondemand_design.md
@ -721,7 +721,7 @@ The On Demand approach has some limitations:

 There are currently additional technical limitations which we expect to resolve in future releases of the simdjson library:

-* The simdjson library offers runtime dispatching which allows you to compile one binary and have it run at full speed on different processors, taking advantage of the specific features of the processor. The On Demand API does not have runtime dispatch support at this time. To benefit from the On Demand API, you must compile your code for a specific processor. E.g., if your processor supports AVX2 instructions, you should compile your binary executable with AVX2 instruction support (by using your compiler's commands). If you are sufficiently technically proficient, you can implement runtime dispatching within your application, by compiling your On Demand code for different processors.
+* The simdjson library offers runtime dispatching which allows you to compile one binary and have it run at full speed on different processors, taking advantage of the specific features of the processor. The On Demand API has limited runtime dispatch support. Under x64 systems, to fully benefit from the On Demand API, we recommend that you compile your code for a specific processor. E.g., if your processor supports AVX2 instructions, you should compile your binary executable with AVX2 instruction support (by using your compiler's commands). If you are sufficiently technically proficient, you can implement runtime dispatching within your application, by compiling your On Demand code for different processors.
 * There is an initial phase which scans the entire document quickly, irrespective of the size of the document. We plan to break this phase into distinct steps for large files in a future release as we have done with other components of our API (e.g., `parse_many`).
 * The On Demand API does not support JSON Pointer. This capability is currently limited to our core API.

@ -737,23 +737,42 @@ At this time we recommend the On Demand API in the following cases:
 Good applications for the On Demand API might be:

 * You are working from pre-existing large JSON files that have been vetted. You expect them to be well formed according to a known JSON dialect and to have a consistent layout. For example, you might be doing biomedical research or machine learning on top of static data dumps in JSON.
-* You have a closed system on predetermined hardware. Both the generation and the consumption of JSON data is within your system. Your team controls both the software that produces the JSON and the software the parses it, your team knows and control the hardware. Thus you can fully test your system.
+* Both the generation and the consumption of JSON data is within your system. Your team controls both the software that produces the JSON and the software the parses it, your team knows and control the hardware. Thus you can fully test your system.
 * You are working with stable JSON APIs which have a consistent layout and JSON dialect.

-## Checking Your CPU Selection
+## Checking Your CPU Selection (x64 systems)

-Given that the On Demand API does not offer runtime dispatching, your code is compiled against a specific CPU target. You should
-verify that the code is compiled against the target you expect: `haswell` (AVX2 x64 processors), `westmere` (SSE4 x64 processors), `arm64` (64-bit ARM), `ppc64` (64-bit POWER), `fallback` (others). Under x64 processors, many programmers will want to target `haswell` whereas under ARM,
-most programmers will want to target `arm64`. The `fallback` is probably only good for testing purposes, not for deployment.
+The On Demand API uses advanced architecture-specific code for many common processors to make JSON preprocessing and string parsing faster. By default, however, most c++ compilers will compile to the	least common denominator (since the program could theoretically be run anywhere). Since On Demand is inlined into your own code, it cannot always use these advanced versions unless the compiler is told to target them.
+
+On relevant systems, the On Demand API provides some support for runtime dispatching: that is, it will attempt to detect, at runtime, the instructions that your processor supports and optimize the code accordingly. However, it cannot always make full use of the features of your processor.
+
+Some users wish to run at the best possible speed. Under recent Intel and AMD processors, these users should take additional steps to verify that their code is well optimized.
+
+Given that the On Demand API offer limited runtime dispatching, it matters that your code is compiled against a specific CPU target. You should verify that the code is compiled against the target you expect. Thankfully, the simdjson library will tell you exactly what it detects as an implementation: `haswell` (AVX2 x64 processors), `westmere` (SSE4 x64 processors), `arm64` (64-bit ARM), `ppc64` (64-bit POWER), `fallback` (others). Under x64 processors, many programmers will want to target `haswell` whereas under ARM, most programmers will want to target `arm64` (and it should do so automatically). The `fallback` is probably only good for testing purposes, not for deployment.

 ```C++
  std::cout << simdjson::builtin_implementation()->name() << std::endl;
 ```

-If you are using CMake for your C++ project, then you can pass compilation flags to your compiler by using
-the `CMAKE_CXX_FLAGS` variable:
+If the `simdjson::builtin_implementation()->name()` call does not return the architecture you wish to target, you may need to pass flags to your compiler.
+
+If you are using CMake for your C++ project, then you can pass compilation flags to your compiler by using the `CMAKE_CXX_FLAGS` variable:

 ```
 cmake  -DCMAKE_CXX_FLAGS="-march=haswell" -B build_haswell
 cmake --build build_haswell
 ```
+
+You can also pass the flags directly to your compiler when compiling 'by hand':
+
+````
+c++ -march=haswell -O3 myproject.cpp simdjson.cpp
+````
+
+In these examples, the `-march=haswell` flags targets a haswell processor and the resulting binary will run on processors that support all features of the haswell processors.
+
+Instead of specifying a specific microarchitecture, you can let your compiler do the work. The `-march=native` flags says "target the current computer," which is a reasonable default for many applications which both compile and run on the same processor.
+
+Passing `-march=native` to the compiler may make On Demand faster by allowing it to use optimizations specific to your machine. You cannot do this, however, if you are compiling code	that might be run on less advanced machines. That is, be mindful that when compiling with the `-march=native` flag, the resulting binary will run on the current system but may not run on other systems (e.g., on an old processor).
+
+If you are compiling on an ARM or POWER system, you do not need to be concerned with CPU selection during compilation. The `-march=native` flag useful for best performance on x64 (e.g., Intel) systems but it is generally unsupported on some platforms such as ARM (aarch64) or POWER.
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -31,10 +31,12 @@ add_cpp_test(minefieldcheck             LABELS dom acceptance per_implementation
 add_cpp_test(parse_many_test            LABELS dom acceptance per_implementation)
 add_cpp_test(pointercheck               LABELS dom acceptance per_implementation) # https://tools.ietf.org/html/rfc6901
 add_cpp_test(unicode_tests              LABELS dom acceptance per_implementation)
+add_cpp_test(trivially_copyable_test    LABELS dom acceptance per_implementation)

 add_cpp_test(minify_tests        LABELS other acceptance per_implementation)
 add_cpp_test(padded_string_tests LABELS other acceptance                   )

+
 find_program(BASH bash)

 # Below we skip anything on Windows, not just visual studio, because running bash under Windows requires you to
--- a/tests/jsoncheck.cpp
+++ b/tests/jsoncheck.cpp
@ -65,6 +65,8 @@ bool validate(const char *dirname) {
      auto error = simdjson::padded_string::load(fullpath).get(p);
      if (error) {
        std::cerr << "Could not load the file " << fullpath << std::endl;
+        free(fullpath);
+        delete[] is_file_as_expected;
        return EXIT_FAILURE;
      }
      simdjson::dom::parser parser;
--- a/tests/minefieldcheck.cpp
+++ b/tests/minefieldcheck.cpp
@ -63,6 +63,8 @@ bool validate_minefield(const char *dirname) {
      auto error = simdjson::padded_string::load(fullpath).get(p);
      if (error) {
        std::cerr << "Could not load the file " << fullpath << std::endl;
+        free(fullpath);
+        delete[] is_file_as_expected;
        return EXIT_FAILURE;
      }
      simdjson::dom::parser parser;
--- a/tests/trivially_copyable_test.cpp
+++ b/tests/trivially_copyable_test.cpp
@ -0,0 +1,23 @@
+// This file is not part of our main, regular tests.
+#include "../singleheader/simdjson.h"
+#include <iostream>
+#include <type_traits>
+
+int main() {
+  if (!std::is_trivially_copyable<simdjson::dom::element>::value) {
+    std::cerr << "simdjson::dom::element must be trivially copyable"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+  if (!std::is_trivially_copyable<simdjson::dom::array>::value) {
+    std::cerr << "simdjson::dom::array must be trivially copyable" << std::endl;
+    return EXIT_FAILURE;
+  }
+  if (!std::is_trivially_copyable<simdjson::dom::object>::value) {
+    std::cerr << "simdjson::dom::object must be trivially copyable"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}