PPC64 support (#1254)

* Initial PPC64 support

* Add travis CI

* Fix outdated cmake version for travis

* Fix indendtation

* Try another workaround for outdated cmake in travis

* Try beta cmake

* Add dash before beta

* Use builtin snaps

* Use cmake as rocksdb

* Test cmake on bionic

* Remove unnecessary things from travis

* Remove unnecessary things from travis

* Another try of compiler install

* Add all major compilers

* Add all major compilers

* Add all major compilers

* Tweak travis a bit

* Typo

* More robust travis

* Typos typos typos

* Add fewer compilers, add non specific build for clang and gcc, should be the final config

* CMAKE_FLAGS is in incorrect place

* Remove default implementation

* Limit build thread number

* Fall back prefix_xor to a usual implementation, no performance boost is noticed

* Test for power9 as it is the main architecture for OpenPOWER right now

* Add to documentation to build with power9 as the implementation is compatible but compiler optimizations is not

* Replace ARM with PPC in the comment
This commit is contained in:
Danila Kutenin 2020-10-28 01:43:39 +03:00 committed by GitHub
parent 1fd0447dbb
commit f46a0f64f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 1246 additions and 128 deletions

View File

@ -1,30 +1,179 @@
language: cpp
sudo: false
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-7
- g++-7
- clang-format
- python
branches:
only:
- master
dist: bionic
arch:
- ppc64le
matrix:
include:
- os: linux
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-8
env:
- COMPILER="CC=gcc-8 && CXX=g++-8"
compiler: gcc-8
script:
- export CXX=g++-7
- export CC=gcc-7
- make
- make test
- make everything
- make amalgamate
- make clean
- make SANITIZEGOLD=1 test
- make clean
- ARCHFLAGS="-march=nehalem" make
- ARCHFLAGS="-march=nehalem" make test
- ARCHFLAGS="-march=nehalem" make everything
- ./style/run-clang-format.py -r include/ benchmark/ src/ tests/
- os: linux
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-9
env:
- COMPILER="CC=gcc-9 && CXX=g++-9"
compiler: gcc-9
- os: linux
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-10
env:
- COMPILER="CC=gcc-10 && CXX=g++-10"
compiler: gcc-10
- os: linux
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-10
env:
- COMPILER="CC=gcc-10 && CXX=g++-10"
- SANITIZE="on"
compiler: gcc-10-sanitize
- os: linux
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-10
env:
- COMPILER="CC=gcc-10 && CXX=g++-10"
- STATIC="on"
compiler: gcc-10-static
- os: linux
addons:
apt:
sources:
- llvm-toolchain-bionic-6.0
packages:
- clang-6.0
env:
- COMPILER="CC=clang-6.0 && CXX=clang++-6.0"
compiler: clang-6
- os: linux
addons:
apt:
sources:
- llvm-toolchain-bionic-7
packages:
- clang-7
env:
- COMPILER="CC=clang-7 && CXX=clang++-7"
compiler: clang-7
- os: linux
addons:
apt:
sources:
- llvm-toolchain-bionic-8
packages:
- clang-8
env:
- COMPILER="CC=clang-8 && CXX=clang++-8"
compiler: clang-8
- os: linux
addons:
apt:
sources:
- llvm-toolchain-bionic-9
packages:
- clang-9
env:
- COMPILER="CC=clang-9 && CXX=clang++-9"
compiler: clang-9
- os: linux
addons:
apt:
packages:
- clang-10
sources:
- ubuntu-toolchain-r-test
- sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main'
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
env:
- COMPILER="CC=clang-10 && CXX=clang++-10"
compiler: clang-10
- os: linux
addons:
apt:
packages:
- clang-10
sources:
- ubuntu-toolchain-r-test
- sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main'
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
env:
- COMPILER="CC=clang-10 && CXX=clang++-10"
- STATIC="on"
compiler: clang-10-static
- os: linux
addons:
apt:
packages:
- clang-10
sources:
- ubuntu-toolchain-r-test
- sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main'
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
env:
- COMPILER="CC=clang-10 && CXX=clang++-10"
- SANITIZE="on"
compiler: clang-10-sanitize
before_install:
- eval "${COMPILER}"
install:
- if [[ "${TRAVIS_CPU_ARCH}" == "ppc64le" ]]; then
sudo apt-get install libuv1 rhash libstdc++6;
wget https://anaconda.org/conda-forge/cmake/3.17.0/download/linux-ppc64le/cmake-3.17.0-hfb1cb51_0.tar.bz2;
mkdir $HOME/cmake;
tar -xjf cmake-3.17.0-hfb1cb51_0.tar.bz2 -C $HOME/cmake;
export PATH=$HOME/cmake/bin:$PATH;
fi
- export CMAKE_CXX_FLAGS="-maltivec -mcpu=power9 -mtune=power9"
- export CMAKE_C_FLAGS="${CMAKE_CXX_FLAGS}"
- export CMAKE_FLAGS="-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -DSIMDJSON_IMPLEMENTATION=ppc64;fallback";
- if [[ "${SANITIZE}" == "on" ]]; then
export CMAKE_FLAGS="${CMAKE_FLAGS} -DSIMDJSON_SANITIZE=ON";
export ASAN_OPTIONS="detect_leaks=0";
fi
- if [[ "${STATIC}" == "on" ]]; then
export CMAKE_FLAGS="${CMAKE_FLAGS} -DSIMDJSON_BUILD_STATIC=ON";
fi
- export CTEST_FLAGS="-j4 --output-on-failure -E checkperf"
script:
- mkdir build
- cd build
- cmake $CMAKE_FLAGS ..
- cmake --build . -- -j2
- SIMDJSON_FORCE_IMPLEMENTATION=ppc64 ctest $CTEST_FLAGS -L per_implementation
- SIMDJSON_FORCE_IMPLEMENTATION=fallback ctest $CTEST_FLAGS -L per_implementation
- ctest $CTEST_FLAGS -LE "acceptance|per_implementation"

View File

@ -48,7 +48,7 @@ simdjson's source structure, from the top level, looks like this:
implementations).
* simdjson.cpp: A "main source" that includes all implementation files from src/. This is
equivalent to the distributed simdjson.cpp.
* arm64/|fallback/|haswell/|westmere/: Architecture-specific implementations. All functions are
* arm64/|fallback/|haswell/|ppc64/|westmere/: Architecture-specific implementations. All functions are
Each architecture defines its own namespace, e.g. simdjson::haswell.
* generic/: Generic implementations of the simdjson parser. These files may be included and
compiled multiple times, from whichever architectures use them. They assume they are already
@ -67,7 +67,7 @@ Other important files and directories:
* **.circleci:** Definitions for Circle CI.
* **.github/workflows:** Definitions for GitHub Actions (CI).
* **singleheader:** Contains generated `simdjson.h` and `simdjson.cpp` that we release. The files `singleheader/simdjson.h` and `singleheader/simdjson.cpp` should never be edited by hand.
* **singleheader/amalgamate.sh:** Generates `singleheader/simdjson.h` and `singleheader/simdjson.cpp` for release (bash script).
* **singleheader/amalgamate.sh:** Generates `singleheader/simdjson.h` and `singleheader/simdjson.cpp` for release (bash script).
* **benchmark:** This is where we do benchmarking. Benchmarking is core to every change we make; the
cardinal rule is don't regress performance without knowing exactly why, and what you're trading
for it. Many of our benchmarks are microbenchmarks. We are effectively doing controlled scientific experiments for the purpose of understanding what affects our performance. So we simplify as much as possible. We try to avoid irrelevant factors such as page faults, interrupts, unnnecessary system calls. We recommend checking the performance as follows:
@ -251,7 +251,7 @@ We assume you have a common 64-bit Windows PC with at least Visual Studio 2019.
- Install [CMake](https://cmake.org/download/). When you install it, make sure to ask that `cmake` be made available from the command line. Please choose a recent version of cmake.
- Create a subdirectory within simdjson, such as `build`.
- Using a shell, go to this newly created directory. You can start a shell directly from GitHub Desktop (Repository > Open in Command Prompt).
- Type `cmake ..` in the shell while in the `build` repository.
- Type `cmake ..` in the shell while in the `build` repository.
- This last command (`cmake ...`) created a Visual Studio solution file in the newly created directory (e.g., `simdjson.sln`). Open this file in Visual Studio. You should now be able to build the project and run the tests. For example, in the `Solution Explorer` window (available from the `View` menu), right-click `ALL_BUILD` and select `Build`. To test the code, still in the `Solution Explorer` window, select `RUN_TESTS` and select `Build`.

View File

@ -63,8 +63,8 @@ void print_usage(ostream& out) {
out << "-s STAGE - Stop after the given stage." << endl;
out << " -s stage1 - Stop after find_structural_bits." << endl;
out << " -s all - Run all stages." << endl;
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
out << " or ARM64). By default, detects best supported architecture." << endl;
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE," << endl;
out << " PPC64 or ARM64). By default, detects best supported architecture." << endl;
}
void exit_usage(string message) {
@ -99,11 +99,11 @@ struct option_struct {
case 'a': {
auto impl = simdjson::available_implementations[optarg];
if(impl && impl->supported_by_runtime_system()) {
simdjson::active_implementation = impl;
} else {
simdjson::active_implementation = impl;
} else {
std::cerr << "implementation " << optarg << " not found or not supported " << std::endl;
}
}
}
break;
case 's':
if (!strcmp(optarg, "stage1")) {

View File

@ -1,6 +1,6 @@
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
message (STATUS "The simdjson repository appears to be standalone.")
message (STATUS "The simdjson repository appears to be standalone.")
option(SIMDJSON_JUST_LIBRARY "Build just the library, omit tests, tools and benchmarks" OFF)
message (STATUS "By default, we attempt to build everything.")
else()
@ -85,7 +85,7 @@ else()
target_compile_options(simdjson-internal-flags INTERFACE /WX /W3 /sdl /w34714) # https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4714?view=vs-2019
endif()
if(SIMDJSON_VISUAL_STUDIO_BUILD_WITH_DEBUG_INFO_FOR_PROFILING)
target_link_options(simdjson-flags INTERFACE /DEBUG )
target_link_options(simdjson-flags INTERFACE /DEBUG )
target_compile_options(simdjson-flags INTERFACE /Zi)
endif()
else()
@ -101,7 +101,7 @@ endif()
#
# Implementation selection
#
set(SIMDJSON_ALL_IMPLEMENTATIONS "fallback;westmere;haswell;arm64")
set(SIMDJSON_ALL_IMPLEMENTATIONS "fallback;westmere;haswell;arm64;ppc64")
set(SIMDJSON_IMPLEMENTATION "" CACHE STRING "Semicolon-separated list of implementations to include (${SIMDJSON_ALL_IMPLEMENTATIONS}). If this is not set, any implementations that are supported at compile time and may be selected at runtime will be included.")
foreach(implementation ${SIMDJSON_IMPLEMENTATION})
@ -110,7 +110,7 @@ foreach(implementation ${SIMDJSON_IMPLEMENTATION})
endif()
endforeach(implementation)
set(SIMDJSON_EXCLUDE_IMPLEMENTATION "" CACHE STRING "Semicolon-separated list of implementations to exclude (haswell/westmere/arm64/fallback). By default, excludes any implementations that are unsupported at compile time or cannot be selected at runtime.")
set(SIMDJSON_EXCLUDE_IMPLEMENTATION "" CACHE STRING "Semicolon-separated list of implementations to exclude (haswell/westmere/arm64/ppc64/fallback). By default, excludes any implementations that are unsupported at compile time or cannot be selected at runtime.")
foreach(implementation ${SIMDJSON_EXCLUDE_IMPLEMENTATION})
if(NOT (implementation IN_LIST SIMDJSON_ALL_IMPLEMENTATIONS))
message(ERROR "Implementation ${implementation} not supported by simdjson. Possible implementations: ${SIMDJSON_ALL_IMPLEMENTATIONS}")
@ -161,6 +161,11 @@ if(NOT SIMDJSON_IMPLEMENTATION_ARM64)
message(DEPRECATION "SIMDJSON_IMPLEMENTATION_ARM64 is deprecated. Use SIMDJSON_IMPLEMENTATION=-arm64 instead.")
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_IMPLEMENTATION_ARM64=0)
endif()
option(SIMDJSON_IMPLEMENTATION_PPC64 "Include the arm64 implementation" ON)
if(NOT SIMDJSON_IMPLEMENTATION_PPC64)
message(DEPRECATION "SIMDJSON_IMPLEMENTATION_PPC64 is deprecated. Use SIMDJSON_IMPLEMENTATION=-ppc64 instead.")
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_IMPLEMENTATION_PPC64=0)
endif()
option(SIMDJSON_IMPLEMENTATION_FALLBACK "Include the fallback implementation" ON)
if(NOT SIMDJSON_IMPLEMENTATION_FALLBACK)
message(DEPRECATION "SIMDJSON_IMPLEMENTATION_FALLBACK is deprecated. Use SIMDJSON_IMPLEMENTATION=-fallback instead.")

View File

@ -25,7 +25,7 @@ An overview of what you need to know to use simdjson, with examples.
Requirements
------------------
- A recent compiler (LLVM clang6 or better, GNU GCC 7.4 or better) on a 64-bit (ARM or x64 Intel/AMD) POSIX systems such as macOS, freeBSD or Linux. We require that the compiler supports the C++11 standard or better.
- A recent compiler (LLVM clang6 or better, GNU GCC 7.4 or better) on a 64-bit (PPC, ARM or x64 Intel/AMD) POSIX systems such as macOS, freeBSD or Linux. We require that the compiler supports the C++11 standard or better.
- Visual Studio 2017 or better under 64-bit Windows. Users should target a 64-bit build (x64) instead of a 32-bit build (x86). We support the LLVM clang compiler under Visual Studio (clangcl) as well as as the regular Visual Studio compiler. We also support MinGW 64-bit under Windows.
Including simdjson
@ -75,7 +75,7 @@ set(SIMDJSON_BUILD_STATIC ON CACHE INTERNAL "")
FetchContent_MakeAvailable(simdjson)
```
You should replace `GIT_TAG v0.5.0` by the version you need. If you omit `GIT_TAG v0.5.0`, you will work from the main branch of simdjson: we recommend that if you are working on production code,
You should replace `GIT_TAG v0.5.0` by the version you need. If you omit `GIT_TAG v0.5.0`, you will work from the main branch of simdjson: we recommend that if you are working on production code,
Elsewhere in your project, you can declare dependencies on simdjson with lines such as these:
@ -240,7 +240,7 @@ available, we define the macro `SIMDJSON_HAS_STRING_VIEW`.
When we detect that it is unavailable,
we use [string-view-lite](https://github.com/martinmoene/string-view-lite) as a
substitute. In such cases, we use the type alias `using string_view = nonstd::string_view;` to
substitute. In such cases, we use the type alias `using string_view = nonstd::string_view;` to
offer the same API, irrespective of the compiler and standard library. The macro
`SIMDJSON_HAS_STRING_VIEW` will be *undefined* to indicate that we emulate `string_view`.
@ -285,7 +285,7 @@ In some cases, you may have valid JSON strings that you do not wish to parse but
// It does not have to be null-terminated.
const char * some_string = "[ 1, 2, 3, 4] ";
size_t length = std::strlen(some_string);
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
// Create a buffer to receive the minified string. Make sure that there is enough room (length bytes).
std::unique_ptr<char[]> buffer{new char[length]};
size_t new_length{}; // It will receive the minified length.
auto error = simdjson::minify(some_string, length, buffer.get(), new_length);
@ -332,10 +332,10 @@ index allows you to select the indexed node. Within objects, the string value of
select the value. If your keys contain the characters '/' or '~', they must be escaped as '~1' and
'~0' respectively. An empty JSON Path refers to the whole document.
We also extend the JSON Pointer support to include *relative* paths.
We also extend the JSON Pointer support to include *relative* paths.
You can apply a JSON path to any node and the path gets interpreted relatively, as if the currrent node were a whole JSON document.
Consider the following example:
Consider the following example:
```c++
auto cars_json = R"( [
@ -605,7 +605,7 @@ for (dom::element doc : docs) {
// Prints 1 2 3
```
In-memory ndjson strings can be parsed as well, with `parser.parse_many(string)`:
In-memory ndjson strings can be parsed as well, with `parser.parse_many(string)`:
```c++
@ -622,7 +622,7 @@ for (dom::element doc : docs) {
Unlike `parser.parse`, both `parser.load_many(filename)` and `parser.parse_many(string)` may parse
"on demand" (lazily). That is, no parsing may have been done before you enter the loop
"on demand" (lazily). That is, no parsing may have been done before you enter the loop
`for (dom::element doc : docs) {` and you should expect the parser to only ever fully parse one JSON
document at a time.
@ -660,8 +660,8 @@ The simdjson library is fully compliant with the [RFC 8259](https://www.tbray.o
- The only insignificant whitespace characters allowed are the space, the horizontal tab, the line feed and the carriage return. In particular, a JSON document may not contain an unespaced null character.
- A single string or a single number is considered to be a valid JSON document.
- We fully validate the numbers according to the JSON specification. For example, the string `01` is not valid JSON document since the specification states that *leading zeros are not allowed*.
- The specification allows implementations to set limits on the range and precision of numbers accepted. We support 64-bit floating-point numbers as well as integer values.
- We parse integers and floating-point numbers as separate types which allows us to support all signed (two complement's) 64-bit integers, like a Java `long` or a C/C++ `long long` and all 64-bit unsigned integers. When we cannot represent exactly an integer as a signed or unsigned 64-bit value, we reject the JSON document.
- The specification allows implementations to set limits on the range and precision of numbers accepted. We support 64-bit floating-point numbers as well as integer values.
- We parse integers and floating-point numbers as separate types which allows us to support all signed (two complement's) 64-bit integers, like a Java `long` or a C/C++ `long long` and all 64-bit unsigned integers. When we cannot represent exactly an integer as a signed or unsigned 64-bit value, we reject the JSON document.
- We support the full range of 64-bit floating-point numbers (binary64). The values range from `std::numeric_limits<double>::lowest()` to `std::numeric_limits<double>::max()`, so from -1.7976e308 all the way to 1.7975e308. Extreme values (less or equal to -1e308, greater or equal to 1e308) are rejected: we refuse to parse the input document. Numbers are parsed with with a perfect accuracy (ULP 0): the nearest floating-point value is chosen, rounding to even when needed. If you serialized your floating-point numbers with 17 significant digits in a standard compliant manner, the simdjson library is guaranteed to recovere the example same numbers, exactly.
- The specification states that JSON text exchanged between systems that are not part of a closed ecosystem MUST be encoded using UTF-8. The simdjson library does full UTF-8 validation as part of the parsing. The specification states that implementations MUST NOT add a byte order mark: the simdjson library rejects documents starting with a byte order mark.
- The simdjson library validates string content for unescaped characters. Unescaped line breaks and tabs in strings are not allowed.

View File

@ -7,7 +7,7 @@ An overview of what you need to know to use simdjson, with examples.
Requirements
------------------
- A recent compiler (LLVM clang6 or better, GNU GCC 7 or better) on a 64-bit (ARM or x64 Intel/AMD) POSIX systems such as macOS, freeBSD or Linux. We require that the compiler supports the C++11 standard or better.
- A recent compiler (LLVM clang6 or better, GNU GCC 7 or better) on a 64-bit (PPC, ARM or x64 Intel/AMD) POSIX systems such as macOS, freeBSD or Linux. We require that the compiler supports the C++11 standard or better.
- Visual Studio 2017 or better under 64-bit Windows. Users should target a 64-bit build (x64) instead of a 32-bit build (x86). We support the LLVM clang compiler under Visual Studio (clangcl) as well as as the regular Visual Studio compiler.
Including simdjson
@ -56,7 +56,7 @@ set(SIMDJSON_BUILD_STATIC ON CACHE INTERNAL "")
FetchContent_MakeAvailable(simdjson)
```
You should replace `GIT_TAG v0.5.0` by the version you need. If you omit `GIT_TAG v0.5.0`, you will work from the main branch of simdjson: we recommend that if you are working on production code,
You should replace `GIT_TAG v0.5.0` by the version you need. If you omit `GIT_TAG v0.5.0`, you will work from the main branch of simdjson: we recommend that if you are working on production code,
Elsewhere in your project, you can declare dependencies on simdjson with lines such as these:
@ -99,7 +99,7 @@ If you need to keep a document around long term, you can keep or move the parser
During the`load` or `parse` calls, neither the input file nor the input string are ever modified. After calling `load` or `parse`, the source (either a file or a string) can be safely discarded. All of the JSON data is stored in the `parser` instance. The parsed document is also immutable in simdjson: you do not modify it by accessing it.
For best performance, a `parser` instance should be reused over several files: otherwise you will needlessly reallocate memory, an expensive process. It is also possible to avoid entirely memory allocations during parsing when using simdjson.
For best performance, a `parser` instance should be reused over several files: otherwise you will needlessly reallocate memory, an expensive process. It is also possible to avoid entirely memory allocations during parsing when using simdjson.
If you need a lower-level interface, you may call the function `parser.parse(const char * p, size_t l)` on a pointer `p` while specifying the
@ -221,7 +221,7 @@ available, we define the macro `SIMDJSON_HAS_STRING_VIEW`.
When we detect that it is unavailable,
we use [string-view-lite](https://github.com/martinmoene/string-view-lite) as a
substitute. In such cases, we use the type alias `using string_view = nonstd::string_view;` to
substitute. In such cases, we use the type alias `using string_view = nonstd::string_view;` to
offer the same API, irrespective of the compiler and standard library. The macro
`SIMDJSON_HAS_STRING_VIEW` will be *undefined* to indicate that we emulate `string_view`.
@ -314,10 +314,10 @@ index allows you to select the indexed node. Within objects, the string value of
select the value. If your keys contain the characters '/' or '~', they must be escaped as '~1' and
'~0' respectively. An empty JSON Path refers to the whole document.
We also extend the JSON Pointer support to include *relative* paths.
We also extend the JSON Pointer support to include *relative* paths.
You can apply a JSON path to any node and the path gets interpreted relatively, as if the currrent node were a whole JSON document.
Consider the following example:
Consider the following example:
```c++
auto cars_json = R"( [
@ -587,7 +587,7 @@ for (dom::element doc : docs) {
```
In-memory ndjson strings can be parsed as well, with `parser.parse_many(string)`:
In-memory ndjson strings can be parsed as well, with `parser.parse_many(string)`:
```
@ -604,7 +604,7 @@ for (dom::element doc : docs) {
Unlike `parser.parse`, both `parser.load_many(filename)` and `parser.parse_many(string)` may parse
"on demand" (lazily). That is, no parsing may have been done before you enter the loop
"on demand" (lazily). That is, no parsing may have been done before you enter the loop
`for (dom::element doc : docs) {` and you should expect the parser to only ever fully parse one JSON
document at a time.

View File

@ -20,20 +20,22 @@ The current implementations are:
* haswell: AVX2 (2013 Intel Haswell or later)
* westmere: SSE4.2 (2010 Westmere or later).
* arm64: 64-bit ARMv8-A NEON
* ppc64: 64-bit POWER8 and POWER9 with VSX and ALTIVEC extensions. Both big endian and little endian are implemented, depends on the compiler you are using.
* fallback: A generic implementation that runs on any 64-bit processor.
In many cases, you don't know where your compiled binary is going to run, so simdjson automatically
compiles *all* the implementations into the executable. On Intel, it will include 3 implementations
(haswell, westmere and fallback), and on ARM it will include 2 (arm64 and fallback).
(haswell, westmere and fallback), on ARM it will include 2 (arm64 and fallback), and on PPC it will include 2 (ppc64 and fallback).
If you know more about where you're going to run and want to save the space, you can disable any of
these implementations at compile time with `-DSIMDJSON_IMPLEMENTATION_X=0` (where X is HASWELL,
WESTMERE, ARM64 and FALLBACK).
WESTMERE, ARM64, PPC64 and FALLBACK).
The simdjson library automatically sets header flags for each implementation as it compiles; there
is no need to set architecture-specific flags yourself (e.g., `-mavx2`, `/AVX2` or
`-march=haswell`), and it may even break runtime dispatch and your binaries will fail to run on
older processors.
older processors. _Note:_ for POWER9 processors make sure you compile it with `-mcpu=power9` and `-mtune=power9` to
get maximum performance.
Runtime CPU Detection
---------------------
@ -71,7 +73,7 @@ And look them up by name:
```c++
cout << simdjson::available_implementations["fallback"]->description() << endl;
```
Though the fallback implementation should always be available, others might be missing. When
Though the fallback implementation should always be available, others might be missing. When
an implementation is not available, the bracket call `simdjson::available_implementations[name]`
will return the null pointer.

View File

@ -5,12 +5,12 @@ A Better Way to Parse Documents?
Whether we parse JSON or XML, or any other serialized format, there are relatively few common strategies:
- The most established approach is the construction of document-object-model (DOM).
- Another established approach is a event-based approach (like SAX, SAJ).
- Another popular approach is the schema-based deserialization model.
- Another established approach is a event-based approach (like SAX, SAJ).
- Another popular approach is the schema-based deserialization model.
We propose an approach that is as easy to use and often as flexible as the DOM approach, yet as fast and
efficient as the schema-based or event-based approaches. We call this new approach "On Demand". The
simdjson On Demand API offers a familiar, friendly DOM API and
We propose an approach that is as easy to use and often as flexible as the DOM approach, yet as fast and
efficient as the schema-based or event-based approaches. We call this new approach "On Demand". The
simdjson On Demand API offers a familiar, friendly DOM API and
provides the performance of just-in-time parsing on top of the simdjson superior performance.
To achieve ease of use, we mimicked the *form* of a traditional DOM API: you can iterate over
@ -81,10 +81,10 @@ type, we avoid branch mispredictions related to data type determination and impr
We expect users of an On Demand API to work in terms of a JSON dialect, which is a set of expectations and
specifications that come in addition to the [JSON specification](https://www.rfc-editor.org/rfc/rfc8259.txt).
specifications that come in addition to the [JSON specification](https://www.rfc-editor.org/rfc/rfc8259.txt).
The On Demand approach is designed around several principles:
* **Streaming (\*):** It avoids preparsing values, keeping the memory usage and the latency down.
* **Streaming (\*):** It avoids preparsing values, keeping the memory usage and the latency down.
* **Forward-Only:** To prevent reiteration of the same values and to keep the number of variables down (literally), only a single index is maintained and everything uses it (even if you have nested for loops). This means when you are going through an array of arrays, for example, that the inner array loop will advance the index to the next comma, and the array can just pick it up and look at it.
* **Natural Iteration:** A JSON array or object can be iterated with a normal C++ for loop. Nested arrays and objects are supported by nested for loops.
* **Use-Specific Parsing:** Parsing is always specific to the type required by the programmer. For example, if the programmer asks for an unsigned integer, we just start parsing digits. If there were no digits, we toss an error. There are even different parsers for `double`, `uint64_t` and `int64_t` values. This use-specific parsing avoids the branchiness of a generic "type switch," and makes the code more inlineable and compact.
@ -98,9 +98,9 @@ approaches to parsing and parser APIs in use today.
### DOM Parsers
Many of the most usable, popular JSON APIs (including simdjson) deserialize into a **DOM**: an intermediate tree of
objects, arrays and values. In this model, we convert the input data all at once into a tree-like structure (the DOM).
The DOM is then accessed by the programmer like any other in-memory data structure. The resulting API let
you refer to each array or object separately, using familiar techniques like iteration (`for (auto value : array)`)
objects, arrays and values. In this model, we convert the input data all at once into a tree-like structure (the DOM).
The DOM is then accessed by the programmer like any other in-memory data structure. The resulting API let
you refer to each array or object separately, using familiar techniques like iteration (`for (auto value : array)`)
or indexing (`object["key"]`). In some cases, the values are even deserialized directly into familiar C++ constructs like vectors and
maps.
@ -144,14 +144,14 @@ of several gigabytes per second. However, in some instances, it may be possible
The event-based model (originally from the "Streaming API for XML") uses streaming to eliminate the cost of
parsing and storing the entire JSON. In the event-based model, a core JSON engine parses the JSON document
piece by piece, but instead of stuffing values in a DOM tree, it passes each value to a callback function,
letting the user decide for themselves how to handle it. In such a model, the programmer may need to provide functions
for all possible events (a number, a string, a new object, a new array, the array ends, the object ends, and so on).
This allows programmers to work with much larger files without running out of memory.
letting the user decide for themselves how to handle it. In such a model, the programmer may need to provide functions
for all possible events (a number, a string, a new object, a new array, the array ends, the object ends, and so on).
This allows programmers to work with much larger files without running out of memory.
The drawback is complexity: event-based APIs generally have you define a single callback for each type
(e.g. `string_field(std::string_view key, std::string_view value)`). Because of this, the programmer suffers
from context blindness: when they find a string they have to check where it is before they know what to
do with it. Is this string the text of the tweet, the screen name, or something else? Are we even in
do with it. Is this string the text of the tweet, the screen name, or something else? Are we even in
a tweet right now, or is this from some other place in the document
entirely? Though an event-based approach may allow superior performance, it is demanding of the programmer
who must efficiently keep track of its current state within the JSON input.
@ -196,7 +196,7 @@ sax::parser parser;
parser.parse(twitter_callbacks());
```
This is a large amount of code, requiring mental gymnastics even to read. An actual implementation is harder to write
This is a large amount of code, requiring mental gymnastics even to read. An actual implementation is harder to write
and to maintain.
@ -221,7 +221,7 @@ choice, as well as a parser to deserialize the JSON into those structs. Some suc
define your own data structures (`struct`) and they let a preprocessor inspects it and generates a custom JSON parser for it.
Though not all of these schema-based parser generators generate a parser or even optimize for
streaming, but they are *able* to in principle. Unlike the DOM and the event-based models, a schema-based approach assumes
that the structure of the document is known at compile-time.
that the structure of the document is known at compile-time.
Pros of the schema-based approach:
@ -244,9 +244,9 @@ the parser does not. This means it has to look at each value blind with a big "s
statement, asking "is this a number? A string? A boolean? An array? An object?"
In modern processors, this kind of switch statement can make your program run slower
than it needs to because of the high cost of branch misprediction. Indeed, modern processor
cores rely on speculative execution for speed. They "read ahead" in your program, predicting
which instructions to run as soon as the data is available. A single-threaded program can
than it needs to because of the high cost of branch misprediction. Indeed, modern processor
cores rely on speculative execution for speed. They "read ahead" in your program, predicting
which instructions to run as soon as the data is available. A single-threaded program can
execute 2, 3 or even more instructions per cycle--largely because of speculative execution.
Unfortunately, when the processor mispredicts the instructions, typically due to a mispredicted
@ -258,7 +258,7 @@ Type blindness means that the processor has to guess, for every JSON value, whet
an object, number, string or boolean since these correspond to distinct code paths.
Though some JSON files have predictable content, we find in practice that many JSON files
stress the branch prediction. Though branch predictors improve with each new generation of processors,
the cost of branch mispredictions also tends to increase as pipelines expand, and the processors become
the cost of branch mispredictions also tends to increase as pipelines expand, and the processors become
able to schedule longer streams of instructions.
On Demand parsing is tailor-made to solve this problem at the source, parsing values only after the
@ -297,12 +297,12 @@ To help visualize the algorithm, we'll walk through the example C++ given at the
```c++
auto doc = parser.iterate(json);
```
Since this is the first time this parser has been used, `iterate()` first allocates internal
parser buffers if this is the first time through. When reusing an existing parser, allocation
only happens if the new document is bigger than internal buffers can handle. The On Demand
only happens if the new document is bigger than internal buffers can handle. The On Demand
API only ever allocates memory in the `iterate()` function call.
The simdjson library then preprocesses the JSON text at high speed, finding all tokens (i.e. the starting
position of any JSON value, as well as any important operators like `,`, `:`, `]` or `}`).
@ -320,7 +320,7 @@ To help visualize the algorithm, we'll walk through the example C++ given at the
NOTE: You should always have such a `document` instance (here `doc`) and it should remain in scope for the duration
of your parsing function. E.g., you should not use the returned document as a temporary (e.g., `auto x = parser.iterate(json).get_object();`)
followed by other operations as the destruction of the `document` instance makes all of the derived instances
followed by other operations as the destruction of the `document` instance makes all of the derived instances
ill-defined.
@ -357,16 +357,16 @@ To help visualize the algorithm, we'll walk through the example C++ given at the
}
```
What is not explained in this code expansion is *error chaining*.
Generally, you can use `document` methods on a `simdjson_result<...>` value; any errors will
Generally, you can use `document` methods on a `simdjson_result<...>` value; any errors will
just be passed down the chain. Many method calls
can be chained in this manner. So `for (object tweet : doc["statuses"])`, which is the equivalent of
`object tweet = *(doc.get_object()["statuses"].get_array().begin()).get_object()`, could fail in any of
`object tweet = *(doc.get_object()["statuses"].get_array().begin()).get_object()`, could fail in any of
6 method calls, and the error will only be checked at the end,
when you attempt to cast the final `simdjson_result<object>` to object. Upon casting, an exception is
thrown if there was an error.
NOTE: while the document can be queried once for a key as if it were an object, it is not an actual object
instance. If you need to treat it as an object (e.g., to query more than one keys), you can cast it as
instance. If you need to treat it as an object (e.g., to query more than one keys), you can cast it as
such `ondemand::object root_object = doc.get_object();`.
@ -403,7 +403,7 @@ To help visualize the algorithm, we'll walk through the example C++ given at the
`["screen_name"]` then converts to object, checking for `{`, and finds `"screen_name"`.
To convert the result to usable string (i.e., the screen name `lemire`), the characters are written to the document's
To convert the result to usable string (i.e., the screen name `lemire`), the characters are written to the document's
string buffer (after possibly escaping them), which now has *two* string_views pointing into it, and looks like `first!\0lemire\0`.
Finally, the temporary user object is destroyed, causing it to skip the remainder of the object
@ -454,7 +454,7 @@ To help visualize the algorithm, we'll walk through the example C++ given at the
At the end of the loop, the `tweet` is first destroyed, skipping the remainder of the tweet
object (`}`).
The `iter++` instruction from `for (ondemand::object tweet : doc["statuses"])` then checks whether there are
more values and finds that there are none (`]`). It marks the array iteration as finished and the for
loop terminates.
@ -467,7 +467,7 @@ Design Features
### String Parsing
When the user requests strings, we unescape them to a single string buffer much like the DOM parser
so that users enjoy the same string performance as the core simdjson. We do not write the length to the
so that users enjoy the same string performance as the core simdjson. We do not write the length to the
string buffer, however; that is stored in the `string_view` instance we return to the user.
```C++
@ -489,11 +489,11 @@ case with `std::string`) but be mindful that the life cycle of these `string_vie
parser instance. If the parser instance is destroyed or reused for a new JSON document, these strings are no longer valid.
We iterate through object instances using `field` instances which represent key-value pairs. The value
is accessible by the `value()` method whereas the key is accessible by the `key()` method.
The keys are treated differently than values are made available as as special type `raw_json_string`
which is a lightweight type that is meant to be used on a temporary basis, amost solely for
is accessible by the `value()` method whereas the key is accessible by the `key()` method.
The keys are treated differently than values are made available as as special type `raw_json_string`
which is a lightweight type that is meant to be used on a temporary basis, amost solely for
direct raw ASCII comparisons (`field.key() == "mykey"`). If you occasionally need to access and store the
unescaped key values, you may use the `unescaped_key()` method. Once you have called `unescaped_key()` method,
unescaped key values, you may use the `unescaped_key()` method. Once you have called `unescaped_key()` method,
neither the `key()` nor the `unescaped_key()` methods should be called: the current field instance
has no longer a key (that is by design). Like other strings, the resulting `std::string_view` generated
from the `unescaped_key()` method has a lifecycle tied to the `parser` instance: once the parser
@ -512,7 +512,7 @@ for(auto field : doc.get_object()) {
The On Demand API is powerful. To compensate, we add some safeguards to ensure that it can be used without fear
in production systems:
- If the value fails to be parsed as one type, the program can try to parse it as something else until the program succeeds. Thus
- If the value fails to be parsed as one type, the program can try to parse it as something else until the program succeeds. Thus
the programmer can engineer fall back routines.
- If the value succeeds in being parsed or converted to a type, the program cannot try again. An attempt to parse the same node twice will
cause the program to abort. We put this safety measure in the API to prevent double iteration of an array which
@ -533,8 +533,8 @@ in production systems:
// parent owns the focus
ondemand::object c1 = parent["child1"];
// c1 owns the focus
//
if(std::string_view(c1["name"]) != "John") { ... }
//
if(std::string_view(c1["name"]) != "John") { ... }
// c2 attempts to grab the focus from parent but fails
ondemand::object c2 = parent["child2"];
// c2 is now in an unsafe state and the following line would be unsafe
@ -552,7 +552,7 @@ in production systems:
{
ondemand::object c1 = parent["child1"];
// c1 grabbed the focus from parent
if(std::string_view(c1["name"]) != "John") { return false; }
if(std::string_view(c1["name"]) != "John") { return false; }
}
// c1 went out of scope, so its destructor was called and the focus
// was handed back to parent.
@ -604,7 +604,7 @@ At this time we recommend the On Demand API in the following cases:
3. Speed and efficiency are of the utmost importance. Keep in mind that the core simdjson API is highly efficient so adopting the On Demand API is not necessary for high efficiency.
4. As a developer, you value a clean, flexible and maintainable API.
Good applications for the On Demand API might be:
Good applications for the On Demand API might be:
* You are working from pre-existing large JSON files that have been vetted. You expect them to be well formed according to a known JSON dialect and to have a consistent layout. For example, you might be doing biomedical research or machine learning on top of static data dumps in JSON.
* You have a closed system on predetermined hardware. Both the generation and the consumption of JSON data is within your system. Your team controls both the software that produces the JSON and the software the parses it, your team knows and control the hardware. Thus you can fully test your system.
@ -613,7 +613,7 @@ Good applications for the On Demand API might be:
## Checking Your CPU Selection
Given that the On Demand API does not offer runtime dispatching, your code is compiled against a specific CPU target. You should
verify that the code is compiled against the target you expect: `haswell` (AVX2 x64 processors), `westmere` (SSE4 x64 processors), `arm64` (64-bit ARM), `fallback` (others). Under x64 processors, many programmers will want to target `haswell` whereas under ARM,
verify that the code is compiled against the target you expect: `haswell` (AVX2 x64 processors), `westmere` (SSE4 x64 processors), `arm64` (64-bit ARM), `ppc64` (64-bit POWER), `fallback` (others). Under x64 processors, many programmers will want to target `haswell` whereas under ARM,
most programmers will want to target `arm64`. The `fallback` is probably only good for testing purposes, not for deployment.
```C++

View File

@ -157,7 +157,7 @@ Downclocking
SIMD instructions are the public transportation of computing. Instead of using 4 distinct instructions to add numbers, you can replace them with a single instruction that does the same work. Though the one instruction is slightly more expensive, the energy used per unit of work is much less with SIMD. If you can increase your speed using SIMD instructions (NEON, SSE, AVX), you should expect to reduce your power usage.
The SIMD instructions that simdjson relies upon (SSE and AVX under x64, NEON under ARM) are routinely part of runtime libraries (e.g., [Go](https://golang.org/src/runtime/memmove_amd64.s), [Glibc](https://github.com/ihtsae/glibc/commit/5f3d0b78e011d2a72f9e88b0e9ef5bc081d18f97), [LLVM](https://github.com/llvm/llvm-project/blob/96f3ea0d21b48ca088355db10d4d1a2e9bc9f884/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.cpp), [Rust](https://github.com/rust-lang/rust/commit/070fad1701fb36b112853b0a6a9787a7bb7ff34c), [Java](http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/c1374141598c/src/cpu/x86/vm/stubGenerator_x86_64.cpp#l1297), [PHP](https://github.com/php/php-src/blob/e5cb53ec68603d4dbdd780fd3ecfca943b4fd383/ext/standard/string.c)). What distinguishes the simdjson library is that it is built from the ground up to benefit from these instructions.
The SIMD instructions that simdjson relies upon (SSE and AVX under x64, NEON under ARM, ALTIVEC under PPC) are routinely part of runtime libraries (e.g., [Go](https://golang.org/src/runtime/memmove_amd64.s), [Glibc](https://github.com/ihtsae/glibc/commit/5f3d0b78e011d2a72f9e88b0e9ef5bc081d18f97), [LLVM](https://github.com/llvm/llvm-project/blob/96f3ea0d21b48ca088355db10d4d1a2e9bc9f884/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.cpp), [Rust](https://github.com/rust-lang/rust/commit/070fad1701fb36b112853b0a6a9787a7bb7ff34c), [Java](http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/c1374141598c/src/cpu/x86/vm/stubGenerator_x86_64.cpp#l1297), [PHP](https://github.com/php/php-src/blob/e5cb53ec68603d4dbdd780fd3ecfca943b4fd383/ext/standard/string.c)). What distinguishes the simdjson library is that it is built from the ground up to benefit from these instructions.
You should not expect the simdjson library to cause *downclocking* of your recent Intel CPU cores.
@ -167,7 +167,7 @@ On some Intel processors, using SIMD instructions in a sustained manner on the s
- [Whenever 512-bit AVX-512 instructions are used](https://lemire.me/blog/2018/09/07/avx-512-when-and-how-to-use-these-new-instructions/).
- Whenever heavy 256-bit or wider instructions are used. Heavy instructions are those involving floating point operations or integer multiplications (since these execute on the floating point unit).
The simdjson library does not currently support AVX-512 instructions and it does not make use of heavy 256-bit instructions. We do use vectorized multiplications, but only using 128-bit registers. Thus there should be no downclocking due to simdjson on recent processors.
The simdjson library does not currently support AVX-512 instructions and it does not make use of heavy 256-bit instructions. We do use vectorized multiplications, but only using 128-bit registers. Thus there should be no downclocking due to simdjson on recent processors.
You may still be worried about which SIMD instruction set is used by simdjson. Thankfully, [you can always determine and change which architecture-specific implementation is used](implementation-selection.md) by simdjson. Thus even if your CPU supports AVX2, you do not need to use AVX2. You are in control.

View File

@ -5,7 +5,7 @@
* @mainpage
*
* Check the [README.md](https://github.com/lemire/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second).
*
*
* Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples.
#include "simdjson.h"
@ -21,7 +21,7 @@
{ "12345" : {"a":12.34, "b":56.78, "c": 9998877} },
{ "12545" : {"a":11.44, "b":12.78, "c": 11111111} }
] )"_padded;
for (simdjson::dom::object obj : parser.parse(abstract_json)) {
for(const auto& key_value : obj) {
cout << "key: " << key_value.key << " : ";
@ -85,6 +85,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
#include "simdjson/arm64.h"
#include "simdjson/haswell.h"
#include "simdjson/westmere.h"
#include "simdjson/ppc64.h"
#include "simdjson/fallback.h"
#include "simdjson/builtin.h"

View File

@ -10,6 +10,8 @@
#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere
#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64
#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64
#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64
#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64
#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK
#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback
#else
@ -21,7 +23,7 @@ namespace simdjson {
/**
* Represents the best statically linked simdjson implementation that can be used by the compiling
* program.
*
*
* Detects what options the program is compiled against, and picks the minimum implementation that
* will work on any computer that can run the program. For example, if you compile with g++
* -march=westmere, it will pick the westmere implementation. The haswell implementation will

View File

@ -5,7 +5,7 @@
// Default Fallback to on unless a builtin implementation has already been selected.
#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK
#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE)
#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE && !SIMDJSON_CAN_ALWAYS_RUN_PPC64)
#endif
#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK

View File

@ -65,10 +65,17 @@ enum instruction_set {
SSE42 = 0x8,
PCLMULQDQ = 0x10,
BMI1 = 0x20,
BMI2 = 0x40
BMI2 = 0x40,
ALTIVEC = 0x80
};
#if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
#if defined(__PPC64__)
static inline uint32_t detect_supported_architectures() {
return instruction_set::ALTIVEC;
}
#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
#if defined(__ARM_NEON)

View File

@ -7,7 +7,7 @@
#include <cfloat>
#include <cassert>
#ifndef _WIN32
// strcasecmp, strncasecmp
// strcasecmp, strncasecmp
#include <strings.h>
#endif
@ -17,7 +17,7 @@
* We want to differentiate carefully between
* clang under visual studio and regular visual
* studio.
*
*
* Under clang for Windows, we enable:
* * target pragmas so that part and only part of the
* code gets compiled for advanced instructions.
@ -43,7 +43,9 @@
#define SIMDJSON_IS_X86_64 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#define SIMDJSON_IS_ARM64 1
#else
#elif defined(__PPC64__) || defined(_M_PPC64)
#define SIMDJSON_IS_PPC64 1
#else
#define SIMDJSON_IS_32BITS 1
// We do not support 32-bit platforms, but it can be
@ -52,6 +54,8 @@
#define SIMDJSON_IS_X86_32BITS 1
#elif defined(__arm__) || defined(_M_ARM)
#define SIMDJSON_IS_ARM_32BITS 1
#elif defined(__PPC__) || defined(_M_PPC)
#define SIMDJSON_IS_PPC_32BITS 1
#endif
#endif // defined(__x86_64__) || defined(_M_AMD64)
@ -61,7 +65,7 @@
for 64-bit processors and it seems that you are not \
compiling for a known 64-bit platform. All fast kernels \
will be disabled and performance may be poor. Please \
use a 64-bit target such as x64 or 64-bit ARM.")
use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
#endif // SIMDJSON_IS_32BITS
// this is almost standard?
@ -72,12 +76,12 @@ use a 64-bit target such as x64 or 64-bit ARM.")
// Our fast kernels require 64-bit systems.
//
// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
// Furthermore, the number of SIMD registers is reduced.
// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
// Furthermore, the number of SIMD registers is reduced.
//
// On 32-bit ARM, we would have smaller registers.
//
// The simdjson users should still have the fallback kernel. It is
// The simdjson users should still have the fallback kernel. It is
// slower, but it should run everywhere.
//

49
include/simdjson/ppc64.h Normal file
View File

@ -0,0 +1,49 @@
#ifndef SIMDJSON_PPC64_H
#define SIMDJSON_PPC64_H
#ifdef SIMDJSON_FALLBACK_H
#error "ppc64.h must be included before fallback.h"
#endif
#include "simdjson/portability.h"
#include "simdjson/internal/isadetection.h"
#include "simdjson/internal/jsoncharutils_tables.h"
#include "simdjson/internal/numberparsing_tables.h"
#include "simdjson/internal/simdprune_tables.h"
#if SIMDJSON_IMPLEMENTATION_PPC64
namespace simdjson {
/**
* Implementation for ALTIVEC (PPC64).
*/
namespace ppc64 {
} // namespace ppc64
} // namespace simdjson
#include "simdjson/ppc64/implementation.h"
#include "simdjson/ppc64/begin.h"
// Declarations
#include "simdjson/generic/dom_parser_implementation.h"
#include "simdjson/ppc64/intrinsics.h"
#include "simdjson/ppc64/bitmanipulation.h"
#include "simdjson/ppc64/bitmask.h"
#include "simdjson/ppc64/simd.h"
#include "simdjson/generic/jsoncharutils.h"
#include "simdjson/generic/atomparsing.h"
#include "simdjson/ppc64/stringparsing.h"
#include "simdjson/ppc64/numberparsing.h"
#include "simdjson/generic/implementation_simdjson_result_base.h"
#include "simdjson/generic/ondemand.h"
// Inline definitions
#include "simdjson/generic/implementation_simdjson_result_base-inl.h"
#include "simdjson/generic/ondemand-inl.h"
#include "simdjson/ppc64/end.h"
#endif // SIMDJSON_IMPLEMENTATION_PPC64
#endif // SIMDJSON_PPC64_H

View File

@ -0,0 +1 @@
#define SIMDJSON_IMPLEMENTATION ppc64

View File

@ -0,0 +1,70 @@
#ifndef SIMDJSON_PPC64_BITMANIPULATION_H
#define SIMDJSON_PPC64_BITMANIPULATION_H
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {
// We sometimes call trailing_zero on inputs that are zero,
// but the algorithms do not end up using the returned value.
// Sadly, sanitizers are not smart enough to figure it out.
NO_SANITIZE_UNDEFINED
simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long ret;
// Search the mask data from least significant bit (LSB)
// to the most significant bit (MSB) for a set bit (1).
_BitScanForward64(&ret, input_num);
return (int)ret;
#else // SIMDJSON_REGULAR_VISUAL_STUDIO
return __builtin_ctzll(input_num);
#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
}
/* result might be undefined when input_num is zero */
simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
return input_num & (input_num - 1);
}
/* result might be undefined when input_num is zero */
simdjson_really_inline int leading_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB)
// to least significant bit (LSB) for a set bit (1).
if (_BitScanReverse64(&leading_zero, input_num))
return (int)(63 - leading_zero);
else
return 64;
#else
return __builtin_clzll(input_num);
#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
}
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
simdjson_really_inline int count_ones(uint64_t input_num) {
// note: we do not support legacy 32-bit Windows
return __popcnt64(input_num); // Visual Studio wants two underscores
}
#else
simdjson_really_inline int count_ones(uint64_t input_num) {
return __builtin_popcountll(input_num);
}
#endif
simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2,
uint64_t *result) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
*result = value1 + value2;
return *result < value1;
#else
return __builtin_uaddll_overflow(value1, value2,
(unsigned long long *)result);
#endif
}
} // unnamed namespace
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#endif // SIMDJSON_PPC64_BITMANIPULATION_H

View File

@ -0,0 +1,42 @@
#ifndef SIMDJSON_PPC64_BITMASK_H
#define SIMDJSON_PPC64_BITMASK_H
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {
//
// Perform a "cumulative bitwise xor," flipping bits each time a 1 is
// encountered.
//
// For example, prefix_xor(00100100) == 00011100
//
simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) {
// You can use the version below, however gcc sometimes miscompiles
// vec_pmsum_be, it happens somewhere around between 8 and 9th version.
// The performance boost was not noticeable, falling back to a usual
// implementation.
// __vector unsigned long long all_ones = {~0ull, ~0ull};
// __vector unsigned long long mask = {bitmask, 0};
// // Clang and GCC return different values for pmsum for ull so cast it to one.
// // Generally it is not specified by ALTIVEC ISA what is returned by
// // vec_pmsum_be.
// #if defined(__LITTLE_ENDIAN__)
// return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]);
// #else
// return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]);
// #endif
bitmask ^= bitmask << 1;
bitmask ^= bitmask << 2;
bitmask ^= bitmask << 4;
bitmask ^= bitmask << 8;
bitmask ^= bitmask << 16;
bitmask ^= bitmask << 32;
return bitmask;
}
} // unnamed namespace
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#endif

View File

@ -0,0 +1 @@
#undef SIMDJSON_IMPLEMENTATION

View File

@ -0,0 +1,34 @@
#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H
#define SIMDJSON_PPC64_IMPLEMENTATION_H
#include "simdjson.h"
#include "simdjson/internal/isadetection.h"
namespace simdjson {
namespace ppc64 {
namespace {
using namespace simdjson;
using namespace simdjson::dom;
} // namespace
class implementation final : public simdjson::implementation {
public:
simdjson_really_inline implementation()
: simdjson::implementation("ppc64", "PPC64 ALTIVEC",
internal::instruction_set::ALTIVEC) {}
simdjson_warn_unused error_code create_dom_parser_implementation(
size_t capacity, size_t max_length,
std::unique_ptr<internal::dom_parser_implementation> &dst)
const noexcept final;
simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len,
uint8_t *dst,
size_t &dst_len) const noexcept final;
simdjson_warn_unused bool validate_utf8(const char *buf,
size_t len) const noexcept final;
};
} // namespace ppc64
} // namespace simdjson
#endif // SIMDJSON_PPC64_IMPLEMENTATION_H

View File

@ -0,0 +1,19 @@
#ifndef SIMDJSON_PPC64_INTRINSICS_H
#define SIMDJSON_PPC64_INTRINSICS_H
#include "simdjson.h"
// This should be the correct header whether
// you use visual studio or other compilers.
#include <altivec.h>
// These are defined by altivec.h in GCC toolchain, it is safe to undef them.
#ifdef bool
#undef bool
#endif
#ifdef vector
#undef vector
#endif
#endif // SIMDJSON_PPC64_INTRINSICS_H

View File

@ -0,0 +1,32 @@
#ifndef SIMDJSON_PPC64_NUMBERPARSING_H
#define SIMDJSON_PPC64_NUMBERPARSING_H
#include <byteswap.h>
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {
// we don't have appropriate, so let us use a scalar function
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
static simdjson_really_inline uint32_t
parse_eight_digits_unrolled(const uint8_t *chars) {
uint64_t val;
memcpy(&val, chars, sizeof(uint64_t));
#ifdef __BIG_ENDIAN__
val = bswap_64(val);
#endif
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
}
} // unnamed namespace
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#define SWAR_NUMBER_PARSING
#include "simdjson/generic/numberparsing.h"
#endif // SIMDJSON_PPC64_NUMBERPARSING_H

View File

@ -0,0 +1,471 @@
#ifndef SIMDJSON_PPC64_SIMD_H
#define SIMDJSON_PPC64_SIMD_H
#include "simdjson.h"
#include "simdjson/internal/simdprune_tables.h"
#include "simdjson/ppc64/bitmanipulation.h"
#include <type_traits>
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {
namespace simd {
using __m128i = __vector unsigned char;
template <typename Child> struct base {
__m128i value;
// Zero constructor
simdjson_really_inline base() : value{__m128i()} {}
// Conversion from SIMD register
simdjson_really_inline base(const __m128i _value) : value(_value) {}
// Conversion to SIMD register
simdjson_really_inline operator const __m128i &() const {
return this->value;
}
simdjson_really_inline operator __m128i &() { return this->value; }
// Bit operations
simdjson_really_inline Child operator|(const Child other) const {
return vec_or(this->value, (__m128i)other);
}
simdjson_really_inline Child operator&(const Child other) const {
return vec_and(this->value, (__m128i)other);
}
simdjson_really_inline Child operator^(const Child other) const {
return vec_xor(this->value, (__m128i)other);
}
simdjson_really_inline Child bit_andnot(const Child other) const {
return vec_andc(this->value, (__m128i)other);
}
simdjson_really_inline Child &operator|=(const Child other) {
auto this_cast = (Child *)this;
*this_cast = *this_cast | other;
return *this_cast;
}
simdjson_really_inline Child &operator&=(const Child other) {
auto this_cast = (Child *)this;
*this_cast = *this_cast & other;
return *this_cast;
}
simdjson_really_inline Child &operator^=(const Child other) {
auto this_cast = (Child *)this;
*this_cast = *this_cast ^ other;
return *this_cast;
}
};
// Forward-declared so they can be used by splat and friends.
template <typename T> struct simd8;
template <typename T, typename Mask = simd8<bool>>
struct base8 : base<simd8<T>> {
typedef uint16_t bitmask_t;
typedef uint32_t bitmask2_t;
simdjson_really_inline base8() : base<simd8<T>>() {}
simdjson_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
simdjson_really_inline Mask operator==(const simd8<T> other) const {
return (__m128i)vec_cmpeq(this->value, (__m128i)other);
}
static const int SIZE = sizeof(base<simd8<T>>::value);
template <int N = 1>
simdjson_really_inline simd8<T> prev(simd8<T> prev_chunk) const {
__m128i chunk = this->value;
#ifdef __LITTLE_ENDIAN__
chunk = (__m128i)vec_reve(this->value);
prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk);
#endif
chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N);
#ifdef __LITTLE_ENDIAN__
chunk = (__m128i)vec_reve((__m128i)chunk);
#endif
return chunk;
}
};
// SIMD byte mask type (returned by things like eq and gt)
template <> struct simd8<bool> : base8<bool> {
static simdjson_really_inline simd8<bool> splat(bool _value) {
return (__m128i)vec_splats((unsigned char)(-(!!_value)));
}
simdjson_really_inline simd8<bool>() : base8() {}
simdjson_really_inline simd8<bool>(const __m128i _value)
: base8<bool>(_value) {}
// Splat constructor
simdjson_really_inline simd8<bool>(bool _value)
: base8<bool>(splat(_value)) {}
simdjson_really_inline int to_bitmask() const {
__vector unsigned long long result;
const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00};
result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value,
(__m128i)perm_mask));
#ifdef __LITTLE_ENDIAN__
return static_cast<int>(result[1]);
#else
return static_cast<int>(result[0]);
#endif
}
simdjson_really_inline bool any() const {
return !vec_all_eq(this->value, (__m128i)vec_splats(0));
}
simdjson_really_inline simd8<bool> operator~() const {
return this->value ^ (__m128i)splat(true);
}
};
template <typename T> struct base8_numeric : base8<T> {
static simdjson_really_inline simd8<T> splat(T value) {
(void)value;
return (__m128i)vec_splats(value);
}
static simdjson_really_inline simd8<T> zero() { return splat(0); }
static simdjson_really_inline simd8<T> load(const T values[16]) {
return (__m128i)(vec_vsx_ld(0, (const uint8_t *)values));
}
// Repeat 16 values as many times as necessary (usually for lookup tables)
static simdjson_really_inline simd8<T> repeat_16(T v0, T v1, T v2, T v3, T v4,
T v5, T v6, T v7, T v8, T v9,
T v10, T v11, T v12, T v13,
T v14, T v15) {
return simd8<T>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
v14, v15);
}
simdjson_really_inline base8_numeric() : base8<T>() {}
simdjson_really_inline base8_numeric(const __m128i _value)
: base8<T>(_value) {}
// Store to array
simdjson_really_inline void store(T dst[16]) const {
vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst));
}
// Override to distinguish from bool version
simdjson_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
// Addition/subtraction are the same for signed and unsigned
simdjson_really_inline simd8<T> operator+(const simd8<T> other) const {
return (__m128i)((__m128i)this->value + (__m128i)other);
}
simdjson_really_inline simd8<T> operator-(const simd8<T> other) const {
return (__m128i)((__m128i)this->value - (__m128i)other);
}
simdjson_really_inline simd8<T> &operator+=(const simd8<T> other) {
*this = *this + other;
return *(simd8<T> *)this;
}
simdjson_really_inline simd8<T> &operator-=(const simd8<T> other) {
*this = *this - other;
return *(simd8<T> *)this;
}
// Perform a lookup assuming the value is between 0 and 16 (undefined behavior
// for out of range values)
template <typename L>
simdjson_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value);
}
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted
// as a bitset). Passing a 0 value for mask would be equivalent to writing out
// every byte to output. Only the first 16 - count_ones(mask) bytes of the
// result are significant but 16 bytes get written. Design consideration: it
// seems like a function with the signature simd8<L> compress(uint32_t mask)
// would be sensible, but the AVX ISA makes this kind of approach difficult.
template <typename L>
simdjson_really_inline void compress(uint16_t mask, L *output) const {
using internal::BitsSetTable256mul2;
using internal::pshufb_combine_table;
using internal::thintable_epi8;
// this particular implementation was inspired by work done by @animetosho
// we do it in two steps, first 8 bytes and then second 8 bytes
uint8_t mask1 = uint8_t(mask); // least significant 8 bits
uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
// next line just loads the 64-bit values thintable_epi8[mask1] and
// thintable_epi8[mask2] into a 128-bit register, using only
// two instructions on most compilers.
#ifdef __LITTLE_ENDIAN__
__m128i shufmask = (__m128i)(__vector unsigned long long){
thintable_epi8[mask1], thintable_epi8[mask2]};
#else
__m128i shufmask = (__m128i)(__vector unsigned long long){
thintable_epi8[mask2], thintable_epi8[mask1]};
shufmask = (__m128i)vec_reve((__m128i)shufmask);
#endif
// we increment by 0x08 the second half of the mask
shufmask = ((__m128i)shufmask) +
((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808});
// this is the version "nearly pruned"
__m128i pruned = vec_perm(this->value, this->value, shufmask);
// we still need to put the two halves together.
// we compute the popcount of the first half:
int pop1 = BitsSetTable256mul2[mask1];
// then load the corresponding mask, what it does is to write
// only the first pop1 bytes from the first 8 bytes, and then
// it fills in with the bytes from the second 8 bytes + some filling
// at the end.
__m128i compactmask =
vec_vsx_ld(0, (const uint8_t *)(pshufb_combine_table + pop1 * 8));
__m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask);
vec_vsx_st(answer, 0, (__m128i *)(output));
}
template <typename L>
simdjson_really_inline simd8<L>
lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
L replace5, L replace6, L replace7, L replace8, L replace9,
L replace10, L replace11, L replace12, L replace13, L replace14,
L replace15) const {
return lookup_16(simd8<L>::repeat_16(
replace0, replace1, replace2, replace3, replace4, replace5, replace6,
replace7, replace8, replace9, replace10, replace11, replace12,
replace13, replace14, replace15));
}
};
// Signed bytes
template <> struct simd8<int8_t> : base8_numeric<int8_t> {
simdjson_really_inline simd8() : base8_numeric<int8_t>() {}
simdjson_really_inline simd8(const __m128i _value)
: base8_numeric<int8_t>(_value) {}
// Splat constructor
simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
// Array constructor
simdjson_really_inline simd8(const int8_t *values) : simd8(load(values)) {}
// Member-by-member initialization
simdjson_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11,
int8_t v12, int8_t v13, int8_t v14, int8_t v15)
: simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7,
v8, v9, v10, v11, v12, v13, v14,
v15}) {}
// Repeat 16 values as many times as necessary (usually for lookup tables)
simdjson_really_inline static simd8<int8_t>
repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
v13, v14, v15);
}
// Order-sensitive comparisons
simdjson_really_inline simd8<int8_t>
max_val(const simd8<int8_t> other) const {
return (__m128i)vec_max((__vector signed char)this->value,
(__vector signed char)(__m128i)other);
}
simdjson_really_inline simd8<int8_t>
min_val(const simd8<int8_t> other) const {
return (__m128i)vec_min((__vector signed char)this->value,
(__vector signed char)(__m128i)other);
}
simdjson_really_inline simd8<bool>
operator>(const simd8<int8_t> other) const {
return (__m128i)vec_cmpgt((__vector signed char)this->value,
(__vector signed char)(__m128i)other);
}
simdjson_really_inline simd8<bool>
operator<(const simd8<int8_t> other) const {
return (__m128i)vec_cmplt((__vector signed char)this->value,
(__vector signed char)(__m128i)other);
}
};
// Unsigned bytes
template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
simdjson_really_inline simd8() : base8_numeric<uint8_t>() {}
simdjson_really_inline simd8(const __m128i _value)
: base8_numeric<uint8_t>(_value) {}
// Splat constructor
simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
// Array constructor
simdjson_really_inline simd8(const uint8_t *values) : simd8(load(values)) {}
// Member-by-member initialization
simdjson_really_inline
simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
: simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
v13, v14, v15}) {}
// Repeat 16 values as many times as necessary (usually for lookup tables)
simdjson_really_inline static simd8<uint8_t>
repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
uint8_t v15) {
return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
v13, v14, v15);
}
// Saturated math
simdjson_really_inline simd8<uint8_t>
saturating_add(const simd8<uint8_t> other) const {
return (__m128i)vec_adds(this->value, (__m128i)other);
}
simdjson_really_inline simd8<uint8_t>
saturating_sub(const simd8<uint8_t> other) const {
return (__m128i)vec_subs(this->value, (__m128i)other);
}
// Order-specific operations
simdjson_really_inline simd8<uint8_t>
max_val(const simd8<uint8_t> other) const {
return (__m128i)vec_max(this->value, (__m128i)other);
}
simdjson_really_inline simd8<uint8_t>
min_val(const simd8<uint8_t> other) const {
return (__m128i)vec_min(this->value, (__m128i)other);
}
// Same as >, but only guarantees true is nonzero (< guarantees true = -1)
simdjson_really_inline simd8<uint8_t>
gt_bits(const simd8<uint8_t> other) const {
return this->saturating_sub(other);
}
// Same as <, but only guarantees true is nonzero (< guarantees true = -1)
simdjson_really_inline simd8<uint8_t>
lt_bits(const simd8<uint8_t> other) const {
return other.saturating_sub(*this);
}
simdjson_really_inline simd8<bool>
operator<=(const simd8<uint8_t> other) const {
return other.max_val(*this) == other;
}
simdjson_really_inline simd8<bool>
operator>=(const simd8<uint8_t> other) const {
return other.min_val(*this) == other;
}
simdjson_really_inline simd8<bool>
operator>(const simd8<uint8_t> other) const {
return this->gt_bits(other).any_bits_set();
}
simdjson_really_inline simd8<bool>
operator<(const simd8<uint8_t> other) const {
return this->gt_bits(other).any_bits_set();
}
// Bit-specific operations
simdjson_really_inline simd8<bool> bits_not_set() const {
return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0)));
}
simdjson_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
return (*this & bits).bits_not_set();
}
simdjson_really_inline simd8<bool> any_bits_set() const {
return ~this->bits_not_set();
}
simdjson_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
return ~this->bits_not_set(bits);
}
simdjson_really_inline bool bits_not_set_anywhere() const {
return vec_all_eq(this->value, (__m128i)vec_splats(0));
}
simdjson_really_inline bool any_bits_set_anywhere() const {
return !bits_not_set_anywhere();
}
simdjson_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
return vec_all_eq(vec_and(this->value, (__m128i)bits),
(__m128i)vec_splats(0));
}
simdjson_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
return !bits_not_set_anywhere(bits);
}
template <int N> simdjson_really_inline simd8<uint8_t> shr() const {
return simd8<uint8_t>(
(__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N)));
}
template <int N> simdjson_really_inline simd8<uint8_t> shl() const {
return simd8<uint8_t>(
(__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N)));
}
};
template <typename T> struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
static_assert(NUM_CHUNKS == 4,
"Westmere kernel should use four registers per 64-byte block.");
const simd8<T> chunks[NUM_CHUNKS];
simd8x64(const simd8x64<T> &o) = delete; // no copy allowed
simd8x64<T> &
operator=(const simd8<T> other) = delete; // no assignment allowed
simd8x64() = delete; // no default constructor allowed
simdjson_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1,
const simd8<T> chunk2, const simd8<T> chunk3)
: chunks{chunk0, chunk1, chunk2, chunk3} {}
simdjson_really_inline simd8x64(const T ptr[64])
: chunks{simd8<T>::load(ptr), simd8<T>::load(ptr + 16),
simd8<T>::load(ptr + 32), simd8<T>::load(ptr + 48)} {}
simdjson_really_inline void store(T ptr[64]) const {
this->chunks[0].store(ptr + sizeof(simd8<T>) * 0);
this->chunks[1].store(ptr + sizeof(simd8<T>) * 1);
this->chunks[2].store(ptr + sizeof(simd8<T>) * 2);
this->chunks[3].store(ptr + sizeof(simd8<T>) * 3);
}
simdjson_really_inline simd8<T> reduce_or() const {
return (this->chunks[0] | this->chunks[1]) |
(this->chunks[2] | this->chunks[3]);
}
simdjson_really_inline void compress(uint64_t mask, T *output) const {
this->chunks[0].compress(uint16_t(mask), output);
this->chunks[1].compress(uint16_t(mask >> 16),
output + 16 - count_ones(mask & 0xFFFF));
this->chunks[2].compress(uint16_t(mask >> 32),
output + 32 - count_ones(mask & 0xFFFFFFFF));
this->chunks[3].compress(uint16_t(mask >> 48),
output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
}
simdjson_really_inline uint64_t to_bitmask() const {
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
uint64_t r1 = this->chunks[1].to_bitmask();
uint64_t r2 = this->chunks[2].to_bitmask();
uint64_t r3 = this->chunks[3].to_bitmask();
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
}
simdjson_really_inline uint64_t eq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
this->chunks[2] == mask, this->chunks[3] == mask)
.to_bitmask();
}
simdjson_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
return simd8x64<bool>(this->chunks[0] == other.chunks[0],
this->chunks[1] == other.chunks[1],
this->chunks[2] == other.chunks[2],
this->chunks[3] == other.chunks[3])
.to_bitmask();
}
simdjson_really_inline uint64_t lteq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
this->chunks[2] <= mask, this->chunks[3] <= mask)
.to_bitmask();
}
}; // struct simd8x64<T>
} // namespace simd
} // unnamed namespace
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#endif // SIMDJSON_PPC64_SIMD_INPUT_H

View File

@ -0,0 +1,65 @@
#ifndef SIMDJSON_PPC64_STRINGPARSING_H
#define SIMDJSON_PPC64_STRINGPARSING_H
#include "simdjson.h"
#include "simdjson/ppc64/bitmanipulation.h"
#include "simdjson/ppc64/simd.h"
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {
using namespace simd;
// Holds backslashes and quotes locations.
struct backslash_and_quote {
public:
static constexpr uint32_t BYTES_PROCESSED = 32;
simdjson_really_inline static backslash_and_quote
copy_and_find(const uint8_t *src, uint8_t *dst);
simdjson_really_inline bool has_quote_first() {
return ((bs_bits - 1) & quote_bits) != 0;
}
simdjson_really_inline bool has_backslash() { return bs_bits != 0; }
simdjson_really_inline int quote_index() {
return trailing_zeroes(quote_bits);
}
simdjson_really_inline int backslash_index() {
return trailing_zeroes(bs_bits);
}
uint32_t bs_bits;
uint32_t quote_bits;
}; // struct backslash_and_quote
simdjson_really_inline backslash_and_quote
backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1),
"backslash and quote finder must process fewer than "
"SIMDJSON_PADDING bytes");
simd8<uint8_t> v0(src);
simd8<uint8_t> v1(src + sizeof(v0));
v0.store(dst);
v1.store(dst + sizeof(v0));
// Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on
// PPC; therefore, we smash them together into a 64-byte mask and get the
// bitmask from there.
uint64_t bs_and_quote =
simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
return {
uint32_t(bs_and_quote), // bs_bits
uint32_t(bs_and_quote >> 32) // quote_bits
};
}
} // unnamed namespace
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#include "simdjson/generic/stringparsing.h"
#endif // SIMDJSON_PPC64_STRINGPARSING_H

View File

@ -23,6 +23,9 @@ const westmere::implementation westmere_singleton{};
#if SIMDJSON_IMPLEMENTATION_ARM64
const arm64::implementation arm64_singleton{};
#endif // SIMDJSON_IMPLEMENTATION_ARM64
#if SIMDJSON_IMPLEMENTATION_PPC64
const ppc64::implementation ppc64_singleton{};
#endif // SIMDJSON_IMPLEMENTATION_PPC64
#if SIMDJSON_IMPLEMENTATION_FALLBACK
const fallback::implementation fallback_singleton{};
#endif // SIMDJSON_IMPLEMENTATION_FALLBACK
@ -65,6 +68,9 @@ const std::initializer_list<const implementation *> available_implementation_poi
#if SIMDJSON_IMPLEMENTATION_ARM64
&arm64_singleton,
#endif
#if SIMDJSON_IMPLEMENTATION_PPC64
&ppc64_singleton,
#endif
#if SIMDJSON_IMPLEMENTATION_FALLBACK
&fallback_singleton,
#endif

View File

@ -1,4 +1,4 @@
#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE
#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64
#include <cstdint>
@ -23,16 +23,16 @@ SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256] = {
SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80,
0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff,
0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
};
// 256 * 8 bytes = 2kB, easily fits in cache.
@ -126,6 +126,6 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = {
}; //static uint64_t thintable_epi8[256]
} // namespace internal
} // namespace simdjson
} // namespace simdjson
#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE
#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64

View File

@ -0,0 +1,133 @@
#include "simdjson/ppc64/begin.h"
//
// Stage 1
//
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {
using namespace simd;
struct json_character_block {
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
simdjson_really_inline uint64_t op() const { return _op; }
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
uint64_t _whitespace;
uint64_t _op;
};
simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
simd8x64<uint8_t> v(
(in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
(in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
(in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
(in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2)
);
uint64_t op = simd8x64<bool>(
v.chunks[0].any_bits_set(0x7),
v.chunks[1].any_bits_set(0x7),
v.chunks[2].any_bits_set(0x7),
v.chunks[3].any_bits_set(0x7)
).to_bitmask();
uint64_t whitespace = simd8x64<bool>(
v.chunks[0].any_bits_set(0x18),
v.chunks[1].any_bits_set(0x18),
v.chunks[2].any_bits_set(0x18),
v.chunks[3].any_bits_set(0x18)
).to_bitmask();
return { whitespace, op };
}
simdjson_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
return input.reduce_or().saturating_sub(0b10000000u).bits_not_set_anywhere();
}
simdjson_unused simdjson_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}
simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
}
} // unnamed namespace
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#include "generic/stage1/utf8_lookup4_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
#include "generic/stage1/utf8_validator.h"
//
// Stage 2
//
#include "generic/stage2/tape_builder.h"
//
// Implementation-specific overrides
//
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {
namespace stage1 {
simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
// On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no
// benefit and therefore makes things worse.
// if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
return find_escaped_branchless(backslash);
}
} // namespace stage1
} // unnamed namespace
simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
}
simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;
this->len = _len;
return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
}
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return ppc64::stage1::generic_validate_utf8(buf,len);
}
simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
return stage2::tape_builder::parse_document<false>(*this, _doc);
}
simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
return stage2::tape_builder::parse_document<true>(*this, _doc);
}
simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
auto error = stage1(_buf, _len, false);
if (error) { return error; }
return stage2(_doc);
}
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#include "simdjson/ppc64/end.h"

View File

@ -0,0 +1,21 @@
#include "simdjson/ppc64/begin.h"
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept {
dst.reset( new (std::nothrow) dom_parser_implementation() );
if (!dst) { return MEMALLOC; }
dst->set_capacity(capacity);
dst->set_max_depth(max_depth);
return SUCCESS;
}
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
#include "simdjson/ppc64/end.h"

View File

@ -23,6 +23,10 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
#include "haswell/implementation.cpp"
#include "haswell/dom_parser_implementation.cpp"
#endif
#if SIMDJSON_IMPLEMENTATION_PPC64
#include "ppc64/implementation.cpp"
#include "ppc64/dom_parser_implementation.cpp"
#endif
#if SIMDJSON_IMPLEMENTATION_WESTMERE
#include "westmere/implementation.cpp"
#include "westmere/dom_parser_implementation.cpp"