Move architecture-specific headers to src/ (#287)
* Use namespaces instead of templates for stage1 impls * Move stage1 implementation into the src/ directory * Move architecture-specific code to src/
This commit is contained in:
parent
a1bff85263
commit
585f84a734
|
@ -7,10 +7,14 @@
|
||||||
# Build outputs (TODO build to a subdir so we can exclude that instead)
|
# Build outputs (TODO build to a subdir so we can exclude that instead)
|
||||||
/allparserscheckfile
|
/allparserscheckfile
|
||||||
/basictests
|
/basictests
|
||||||
|
/benchmark/parse
|
||||||
|
/benchmark/perfdiff
|
||||||
|
/benchmark/statisticalmodel
|
||||||
/json2json
|
/json2json
|
||||||
/jsoncheck
|
/jsoncheck
|
||||||
/jsonpointer
|
/jsonpointer
|
||||||
/jsonstats
|
/jsonstats
|
||||||
|
/libsimdjson.so*
|
||||||
/minify
|
/minify
|
||||||
/numberparsingcheck
|
/numberparsingcheck
|
||||||
/parse
|
/parse
|
||||||
|
@ -25,8 +29,33 @@
|
||||||
/simdjson.h
|
/simdjson.h
|
||||||
/singleheader/amalgamation_demo
|
/singleheader/amalgamation_demo
|
||||||
/singleheader/demo
|
/singleheader/demo
|
||||||
|
/tests/basictests
|
||||||
|
/tests/jsoncheck
|
||||||
|
/tests/pointercheck
|
||||||
|
/tools/json2json
|
||||||
|
/tools/jsonstats
|
||||||
|
/tools/minify
|
||||||
|
# CMake ignore from https://github.com/github/gitignore/blob/master/CMake.gitignore
|
||||||
|
|
||||||
# Generic from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
|
CMakeLists.txt.user
|
||||||
|
CMakeCache.txt
|
||||||
|
CMakeFiles
|
||||||
|
CMakeScripts
|
||||||
|
Testing
|
||||||
|
Makefile
|
||||||
|
cmake_install.cmake
|
||||||
|
install_manifest.txt
|
||||||
|
compile_commands.json
|
||||||
|
CTestTestfile.cmake
|
||||||
|
_deps
|
||||||
|
|
||||||
|
# CMake files that may be specific to our installation
|
||||||
|
/CPackConfig.cmake
|
||||||
|
/CPackSourceConfig.cmake
|
||||||
|
# We check in a custom version of root Makefile that is not generated by CMake
|
||||||
|
!/Makefile
|
||||||
|
|
||||||
|
# C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
|
||||||
|
|
||||||
# Prerequisites
|
# Prerequisites
|
||||||
*.d
|
*.d
|
||||||
|
|
8
Makefile
8
Makefile
|
@ -22,7 +22,7 @@ else
|
||||||
ARCHFLAGS ?= -msse4.2 -mpclmul # lowest supported feature set?
|
ARCHFLAGS ?= -msse4.2 -mpclmul # lowest supported feature set?
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(EXTRAFLAGS)
|
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Isrc -Ibenchmark/linux $(EXTRAFLAGS)
|
||||||
CFLAGS = $(ARCHFLAGS) -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src $(EXTRAFLAGS)
|
CFLAGS = $(ARCHFLAGS) -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src $(EXTRAFLAGS)
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,7 +63,11 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
|
||||||
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
|
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
|
||||||
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
||||||
|
|
||||||
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten_common.h include/simdjson/stage1_find_marks_flatten_haswell.h
|
# Load headers and sources
|
||||||
|
LIBHEADERS=src/simd_input.h src/simdutf8check.h src/stringparsing.h src/arm64/architecture.h src/arm64/simd_input.h src/arm64/simdutf8check.h src/arm64/stage1_find_marks.h src/arm64/stage2_build_tape.h src/arm64/stringparsing.h src/generic/stage1_find_marks_flatten.h src/generic/stage1_find_marks.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/haswell/architecture.h src/haswell/simd_input.h src/haswell/simdutf8check.h src/haswell/stage1_find_marks.h src/haswell/stage2_build_tape.h src/haswell/stringparsing.h src/westmere/architecture.h src/westmere/simd_input.h src/westmere/simdutf8check.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
|
||||||
|
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/numberparsing.h include/simdjson/padded_string.h include/simdjson/parsedjson.h include/simdjson/parsedjsoniterator.h include/simdjson/portability.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/simdprune_tables.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
|
||||||
|
HEADERS=$(PUBHEADERS) $(LIBHEADERS)
|
||||||
|
|
||||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
|
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
|
||||||
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
|
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
|
||||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||||
|
|
|
@ -17,8 +17,28 @@ $SCRIPTPATH/src/simdjson.cpp
|
||||||
$SCRIPTPATH/src/jsonioutil.cpp
|
$SCRIPTPATH/src/jsonioutil.cpp
|
||||||
$SCRIPTPATH/src/jsonminifier.cpp
|
$SCRIPTPATH/src/jsonminifier.cpp
|
||||||
$SCRIPTPATH/src/jsonparser.cpp
|
$SCRIPTPATH/src/jsonparser.cpp
|
||||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
|
$SCRIPTPATH/src/simd_input.h
|
||||||
|
$SCRIPTPATH/src/arm64/architecture.h
|
||||||
|
$SCRIPTPATH/src/haswell/architecture.h
|
||||||
|
$SCRIPTPATH/src/westmere/architecture.h
|
||||||
|
$SCRIPTPATH/src/arm64/simd_input.h
|
||||||
|
$SCRIPTPATH/src/haswell/simd_input.h
|
||||||
|
$SCRIPTPATH/src/westmere/simd_input.h
|
||||||
|
$SCRIPTPATH/src/simdutf8check.h
|
||||||
|
$SCRIPTPATH/src/arm64/simdutf8check.h
|
||||||
|
$SCRIPTPATH/src/haswell/simdutf8check.h
|
||||||
|
$SCRIPTPATH/src/westmere/simdutf8check.h
|
||||||
|
$SCRIPTPATH/src/arm64/stage1_find_marks.h
|
||||||
|
$SCRIPTPATH/src/haswell/stage1_find_marks.h
|
||||||
|
$SCRIPTPATH/src/westmere/stage1_find_marks.h
|
||||||
$SCRIPTPATH/src/stage1_find_marks.cpp
|
$SCRIPTPATH/src/stage1_find_marks.cpp
|
||||||
|
$SCRIPTPATH/src/stringparsing.h
|
||||||
|
$SCRIPTPATH/src/arm64/stringparsing.h
|
||||||
|
$SCRIPTPATH/src/haswell/stringparsing.h
|
||||||
|
$SCRIPTPATH/src/westmere/stringparsing.h
|
||||||
|
$SCRIPTPATH/src/arm64/stage2_build_tape.h
|
||||||
|
$SCRIPTPATH/src/haswell/stage2_build_tape.h
|
||||||
|
$SCRIPTPATH/src/westmere/stage2_build_tape.h
|
||||||
$SCRIPTPATH/src/stage2_build_tape.cpp
|
$SCRIPTPATH/src/stage2_build_tape.cpp
|
||||||
$SCRIPTPATH/src/parsedjson.cpp
|
$SCRIPTPATH/src/parsedjson.cpp
|
||||||
$SCRIPTPATH/src/parsedjsoniterator.cpp
|
$SCRIPTPATH/src/parsedjsoniterator.cpp
|
||||||
|
@ -36,25 +56,10 @@ $SCRIPTPATH/include/simdjson/jsoncharutils.h
|
||||||
$SCRIPTPATH/include/simdjson/jsonformatutils.h
|
$SCRIPTPATH/include/simdjson/jsonformatutils.h
|
||||||
$SCRIPTPATH/include/simdjson/jsonioutil.h
|
$SCRIPTPATH/include/simdjson/jsonioutil.h
|
||||||
$SCRIPTPATH/include/simdjson/simdprune_tables.h
|
$SCRIPTPATH/include/simdjson/simdprune_tables.h
|
||||||
$SCRIPTPATH/include/simdjson/simd_input.h
|
|
||||||
$SCRIPTPATH/include/simdjson/simd_input_haswell.h
|
|
||||||
$SCRIPTPATH/include/simdjson/simd_input_westmere.h
|
|
||||||
$SCRIPTPATH/include/simdjson/simd_input_arm64.h
|
|
||||||
$SCRIPTPATH/include/simdjson/simdutf8check.h
|
|
||||||
$SCRIPTPATH/include/simdjson/simdutf8check_haswell.h
|
|
||||||
$SCRIPTPATH/include/simdjson/simdutf8check_westmere.h
|
|
||||||
$SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
|
|
||||||
$SCRIPTPATH/include/simdjson/jsonminifier.h
|
$SCRIPTPATH/include/simdjson/jsonminifier.h
|
||||||
$SCRIPTPATH/include/simdjson/parsedjson.h
|
$SCRIPTPATH/include/simdjson/parsedjson.h
|
||||||
$SCRIPTPATH/include/simdjson/parsedjsoniterator.h
|
$SCRIPTPATH/include/simdjson/parsedjsoniterator.h
|
||||||
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
|
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
|
||||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
|
|
||||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
|
|
||||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
|
|
||||||
$SCRIPTPATH/include/simdjson/stringparsing.h
|
|
||||||
$SCRIPTPATH/include/simdjson/stringparsing_westmere.h
|
|
||||||
$SCRIPTPATH/include/simdjson/stringparsing_haswell.h
|
|
||||||
$SCRIPTPATH/include/simdjson/stringparsing_arm64.h
|
|
||||||
$SCRIPTPATH/include/simdjson/numberparsing.h
|
$SCRIPTPATH/include/simdjson/numberparsing.h
|
||||||
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
|
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
|
||||||
$SCRIPTPATH/include/simdjson/jsonparser.h
|
$SCRIPTPATH/include/simdjson/jsonparser.h
|
||||||
|
@ -74,17 +79,27 @@ function dofile()
|
||||||
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
|
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
|
||||||
while IFS= read -r line
|
while IFS= read -r line
|
||||||
do
|
do
|
||||||
if [[ "${line}" == '#include "simdjson'* ]]; then
|
if [[ "${line}" == '#include "'*'"'* ]]; then
|
||||||
# we paste the contents of simdjson header files with names ending by _common.h
|
file=$(echo $line| cut -d'"' -f 2)
|
||||||
# we ignore every other simdjson headers
|
|
||||||
if [[ "${line}" == '#include "simdjson/'*'_common.h"'* ]]; then
|
if [[ "${file}" == '../'* ]]; then
|
||||||
file=$(echo $line| cut -d'"' -f 2)
|
file=$(echo $file| cut -d'/' -f 2-)
|
||||||
echo "$(<include/$file)" # we assume those files are always in include/
|
fi;
|
||||||
fi
|
|
||||||
else
|
# we ignore simdjson headers (except src/generic/*.h); they are handled in the above list
|
||||||
# Otherwise we simply copy the line
|
if [ -f include/$file ]; then
|
||||||
echo "$line"
|
continue;
|
||||||
|
elif [ -f src/$file ]; then
|
||||||
|
# we paste the contents of src/generic/*.h
|
||||||
|
if [[ "${file}" == *'generic/'*'.h' ]]; then
|
||||||
|
echo "$(<src/$file)"
|
||||||
|
fi;
|
||||||
|
continue;
|
||||||
|
fi;
|
||||||
fi;
|
fi;
|
||||||
|
|
||||||
|
# Otherwise we simply copy the line
|
||||||
|
echo "$line"
|
||||||
done < "$1"
|
done < "$1"
|
||||||
echo "/* end file $RELFILE */"
|
echo "/* end file $RELFILE */"
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,8 +88,7 @@ int unified_machine_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Responsible to select the best json_parse implementation
|
// Responsible to select the best json_parse implementation
|
||||||
int find_structural_bits_dispatch(const uint8_t *buf, size_t len,
|
int find_structural_bits_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
ParsedJson &pj) {
|
|
||||||
Architecture best_implementation = _find_best_supported_implementation();
|
Architecture best_implementation = _find_best_supported_implementation();
|
||||||
// Selecting the best implementation
|
// Selecting the best implementation
|
||||||
switch (best_implementation) {
|
switch (best_implementation) {
|
||||||
|
|
|
@ -1,35 +1,20 @@
|
||||||
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include/simdjson)
|
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
|
||||||
set(SIMDJSON_INCLUDE
|
set(SIMDJSON_INCLUDE
|
||||||
${SIMDJSON_INCLUDE_DIR}/common_defs.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/common_defs.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/isadetection.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/isadetection.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/jsoncharutils.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/jsoncharutils.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/jsonformatutils.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonformatutils.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/jsonioutil.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/jsonminifier.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonminifier.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/jsonparser.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonparser.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/numberparsing.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/numberparsing.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/padded_string.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/padded_string.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/parsedjson.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjson.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/parsedjsoniterator.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjsoniterator.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/portability.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/portability.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/simdjson.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/simdjson_version.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson_version.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/simdprune_tables.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/simdprune_tables.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/simdutf8check_arm64.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/stage1_find_marks.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/simdutf8check_haswell.h
|
${SIMDJSON_INCLUDE_DIR}/simdjson/stage2_build_tape.h
|
||||||
${SIMDJSON_INCLUDE_DIR}/simdutf8check_westmere.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_arm64.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_common.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_common.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_haswell.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_haswell.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_westmere.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape_common.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stringparsing.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_arm64.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_common.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_haswell.h
|
|
||||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_westmere.h
|
|
||||||
)
|
)
|
|
@ -1,108 +1,19 @@
|
||||||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
||||||
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
||||||
|
|
||||||
#include "simdjson/common_defs.h"
|
|
||||||
#include "simdjson/parsedjson.h"
|
#include "simdjson/parsedjson.h"
|
||||||
#include "simdjson/portability.h"
|
|
||||||
#include "simdjson/simdjson.h"
|
#include "simdjson/simdjson.h"
|
||||||
#include "simdjson/simd_input.h"
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
||||||
template <Architecture> uint64_t compute_quote_mask(uint64_t quote_bits);
|
template <Architecture T = Architecture::NATIVE>
|
||||||
|
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj);
|
||||||
namespace {
|
|
||||||
// for when clmul is unavailable
|
|
||||||
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
|
|
||||||
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
|
|
||||||
quote_mask = quote_mask ^ (quote_mask << 2);
|
|
||||||
quote_mask = quote_mask ^ (quote_mask << 4);
|
|
||||||
quote_mask = quote_mask ^ (quote_mask << 8);
|
|
||||||
quote_mask = quote_mask ^ (quote_mask << 16);
|
|
||||||
quote_mask = quote_mask ^ (quote_mask << 32);
|
|
||||||
return quote_mask;
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
template <Architecture T>
|
|
||||||
really_inline uint64_t find_odd_backslash_sequences(
|
|
||||||
simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash);
|
|
||||||
|
|
||||||
template <Architecture T>
|
|
||||||
really_inline uint64_t find_quote_mask_and_bits(
|
|
||||||
simd_input<T> in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote,
|
|
||||||
uint64_t "e_bits, uint64_t &error_mask);
|
|
||||||
|
|
||||||
// do a 'shufti' to detect structural JSON characters
|
|
||||||
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
|
||||||
// these go into the first 3 buckets of the comparison (1/2/4)
|
|
||||||
|
|
||||||
// we are also interested in the four whitespace characters
|
|
||||||
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
|
||||||
// these go into the next 2 buckets of the comparison (8/16)
|
|
||||||
template <Architecture T>
|
|
||||||
void find_whitespace_and_structurals(simd_input<T> in, uint64_t &whitespace,
|
|
||||||
uint64_t &structurals);
|
|
||||||
|
|
||||||
// return a updated structural bit vector with quoted contents cleared out and
|
|
||||||
// pseudo-structural characters added to the mask
|
|
||||||
// updates prev_iter_ends_pseudo_pred which tells us whether the previous
|
|
||||||
// iteration ended on a whitespace or a structural character (which means that
|
|
||||||
// the next iteration
|
|
||||||
// will have a pseudo-structural character at its start)
|
|
||||||
really_inline uint64_t finalize_structurals(
|
|
||||||
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
|
|
||||||
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
|
|
||||||
// mask off anything inside quotes
|
|
||||||
structurals &= ~quote_mask;
|
|
||||||
// add the real quote bits back into our bit_mask as well, so we can
|
|
||||||
// quickly traverse the strings we've spent all this trouble gathering
|
|
||||||
structurals |= quote_bits;
|
|
||||||
// Now, establish "pseudo-structural characters". These are non-whitespace
|
|
||||||
// characters that are (a) outside quotes and (b) have a predecessor that's
|
|
||||||
// either whitespace or a structural character. This means that subsequent
|
|
||||||
// passes will get a chance to encounter the first character of every string
|
|
||||||
// of non-whitespace and, if we're parsing an atom like true/false/null or a
|
|
||||||
// number we can stop at the first whitespace or structural character
|
|
||||||
// following it.
|
|
||||||
|
|
||||||
// a qualified predecessor is something that can happen 1 position before an
|
|
||||||
// pseudo-structural character
|
|
||||||
uint64_t pseudo_pred = structurals | whitespace;
|
|
||||||
|
|
||||||
uint64_t shifted_pseudo_pred =
|
|
||||||
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
|
|
||||||
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
|
|
||||||
uint64_t pseudo_structurals =
|
|
||||||
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
|
|
||||||
structurals |= pseudo_structurals;
|
|
||||||
|
|
||||||
// now, we've used our close quotes all we need to. So let's switch them off
|
|
||||||
// they will be off in the quote mask and on in quote bits.
|
|
||||||
structurals &= ~(quote_bits & ~quote_mask);
|
|
||||||
return structurals;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <Architecture T = Architecture::NATIVE>
|
template <Architecture T = Architecture::NATIVE>
|
||||||
int find_structural_bits(const uint8_t *buf, size_t len,
|
int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||||
simdjson::ParsedJson &pj);
|
|
||||||
|
|
||||||
template <Architecture T = Architecture::NATIVE>
|
|
||||||
int find_structural_bits(const char *buf, size_t len,
|
|
||||||
simdjson::ParsedJson &pj) {
|
|
||||||
return find_structural_bits((const uint8_t *)buf, len, pj);
|
return find_structural_bits((const uint8_t *)buf, len, pj);
|
||||||
}
|
}
|
||||||
|
|
||||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
}; // namespace simdjson
|
||||||
// plus their position in the bitvector, and store these indexes at
|
|
||||||
// base_ptr[base] incrementing base as we go
|
|
||||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
|
||||||
// needs to be large enough to handle this
|
|
||||||
template <Architecture T = Architecture::NATIVE>
|
|
||||||
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
|
||||||
uint32_t idx, uint64_t bits);
|
|
||||||
|
|
||||||
} // namespace simdjson
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,90 +0,0 @@
|
||||||
// This file provides the same function as
|
|
||||||
// stage1_find_marks_flatten_common.h, but uses Intel intrinsics.
|
|
||||||
// This should provide better performance on Visual Studio
|
|
||||||
// and other compilers that do a conservative optimization.
|
|
||||||
|
|
||||||
// Specifically, on x64 processors with BMI,
|
|
||||||
// x & (x - 1) should be mapped to
|
|
||||||
// the blsr instruction. By using the
|
|
||||||
// _blsr_u64 intrinsic, we
|
|
||||||
// ensure that this will happen.
|
|
||||||
/////////
|
|
||||||
|
|
||||||
#include "simdjson/common_defs.h"
|
|
||||||
#include "simdjson/portability.h"
|
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
|
||||||
|
|
||||||
TARGET_HASWELL
|
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
|
||||||
// plus their position in the bitvector, and store these indexes at
|
|
||||||
// base_ptr[base] incrementing base as we go
|
|
||||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
|
||||||
// needs to be large enough to handle this
|
|
||||||
template<>
|
|
||||||
really_inline void flatten_bits<Architecture::HASWELL>(uint32_t *base_ptr, uint32_t &base,
|
|
||||||
uint32_t idx, uint64_t bits) {
|
|
||||||
// In some instances, the next branch is expensive because it is mispredicted.
|
|
||||||
// Unfortunately, in other cases,
|
|
||||||
// it helps tremendously.
|
|
||||||
if (bits == 0)
|
|
||||||
return;
|
|
||||||
uint32_t cnt = _mm_popcnt_u64(bits);
|
|
||||||
uint32_t next_base = base + cnt;
|
|
||||||
idx -= 64;
|
|
||||||
base_ptr += base;
|
|
||||||
{
|
|
||||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[1] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[2] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[3] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[4] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[5] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[6] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[7] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr += 8;
|
|
||||||
}
|
|
||||||
// We hope that the next branch is easily predicted.
|
|
||||||
if (cnt > 8) {
|
|
||||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[1] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[2] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[3] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[4] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[5] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[6] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr[7] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr += 8;
|
|
||||||
}
|
|
||||||
if (cnt > 16) { // unluckly: we rarely get here
|
|
||||||
// since it means having one structural or pseudo-structral element
|
|
||||||
// every 4 characters (possible with inputs like "","","",...).
|
|
||||||
do {
|
|
||||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
|
||||||
bits = _blsr_u64(bits);
|
|
||||||
base_ptr++;
|
|
||||||
} while (bits != 0);
|
|
||||||
}
|
|
||||||
base = next_base;
|
|
||||||
}
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
#endif // IS_X86_64
|
|
|
@ -1,116 +0,0 @@
|
||||||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
|
||||||
#define SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
|
||||||
|
|
||||||
#include "simdjson/simd_input_haswell.h"
|
|
||||||
#include "simdjson/simdutf8check_haswell.h"
|
|
||||||
#include "simdjson/stage1_find_marks.h"
|
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
|
||||||
|
|
||||||
TARGET_HASWELL
|
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
template <>
|
|
||||||
really_inline uint64_t
|
|
||||||
compute_quote_mask<Architecture::HASWELL>(uint64_t quote_bits) {
|
|
||||||
// There should be no such thing with a processing supporting avx2
|
|
||||||
// but not clmul.
|
|
||||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
|
||||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
|
||||||
return quote_mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
really_inline void find_whitespace_and_structurals<Architecture::HASWELL>(
|
|
||||||
simd_input<Architecture::HASWELL> in, uint64_t &whitespace,
|
|
||||||
uint64_t &structurals) {
|
|
||||||
#ifdef SIMDJSON_NAIVE_STRUCTURAL
|
|
||||||
// You should never need this naive approach, but it can be useful
|
|
||||||
// for research purposes
|
|
||||||
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
|
|
||||||
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
|
|
||||||
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
|
|
||||||
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
|
|
||||||
struct_lo =
|
|
||||||
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_brace));
|
|
||||||
struct_hi =
|
|
||||||
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_brace));
|
|
||||||
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
|
|
||||||
struct_lo =
|
|
||||||
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_open_bracket));
|
|
||||||
struct_hi =
|
|
||||||
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_open_bracket));
|
|
||||||
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
|
|
||||||
struct_lo =
|
|
||||||
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_bracket));
|
|
||||||
struct_hi =
|
|
||||||
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_bracket));
|
|
||||||
const __m256i mask_column = _mm256_set1_epi8(0x3a);
|
|
||||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_column));
|
|
||||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_column));
|
|
||||||
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
|
|
||||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_comma));
|
|
||||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_comma));
|
|
||||||
uint64_t structural_res_0 =
|
|
||||||
static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
|
|
||||||
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
|
|
||||||
structurals = (structural_res_0 | (structural_res_1 << 32));
|
|
||||||
|
|
||||||
const __m256i mask_space = _mm256_set1_epi8(0x20);
|
|
||||||
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
|
|
||||||
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
|
|
||||||
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
|
|
||||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_linefeed));
|
|
||||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_linefeed));
|
|
||||||
const __m256i mask_tab = _mm256_set1_epi8(0x09);
|
|
||||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_tab));
|
|
||||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_tab));
|
|
||||||
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
|
|
||||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_carriage));
|
|
||||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_carriage));
|
|
||||||
|
|
||||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
|
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
|
|
||||||
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
|
||||||
// end of naive approach
|
|
||||||
|
|
||||||
#else // SIMDJSON_NAIVE_STRUCTURAL
|
|
||||||
// clang-format off
|
|
||||||
const __m256i structural_table =
|
|
||||||
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
|
|
||||||
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
|
||||||
const __m256i white_table = _mm256_setr_epi8(
|
|
||||||
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
|
|
||||||
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
|
|
||||||
// clang-format on
|
|
||||||
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
|
|
||||||
const __m256i struct_mask = _mm256_set1_epi8(32);
|
|
||||||
|
|
||||||
__m256i lo_white =
|
|
||||||
_mm256_cmpeq_epi8(in.lo, _mm256_shuffle_epi8(white_table, in.lo));
|
|
||||||
__m256i hi_white =
|
|
||||||
_mm256_cmpeq_epi8(in.hi, _mm256_shuffle_epi8(white_table, in.hi));
|
|
||||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_white));
|
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(hi_white);
|
|
||||||
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
|
||||||
__m256i lo_struct_r1 = _mm256_add_epi8(struct_offset, in.lo);
|
|
||||||
__m256i hi_struct_r1 = _mm256_add_epi8(struct_offset, in.hi);
|
|
||||||
__m256i lo_struct_r2 = _mm256_or_si256(in.lo, struct_mask);
|
|
||||||
__m256i hi_struct_r2 = _mm256_or_si256(in.hi, struct_mask);
|
|
||||||
__m256i lo_struct_r3 = _mm256_shuffle_epi8(structural_table, lo_struct_r1);
|
|
||||||
__m256i hi_struct_r3 = _mm256_shuffle_epi8(structural_table, hi_struct_r1);
|
|
||||||
__m256i lo_struct = _mm256_cmpeq_epi8(lo_struct_r2, lo_struct_r3);
|
|
||||||
__m256i hi_struct = _mm256_cmpeq_epi8(hi_struct_r2, hi_struct_r3);
|
|
||||||
|
|
||||||
uint64_t structural_res_0 =
|
|
||||||
static_cast<uint32_t>(_mm256_movemask_epi8(lo_struct));
|
|
||||||
uint64_t structural_res_1 = _mm256_movemask_epi8(hi_struct);
|
|
||||||
structurals = (structural_res_0 | (structural_res_1 << 32));
|
|
||||||
#endif // SIMDJSON_NAIVE_STRUCTURAL
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
|
|
||||||
#endif // IS_X86_64
|
|
||||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
|
|
@ -10,7 +10,6 @@
|
||||||
#include "simdjson/numberparsing.h"
|
#include "simdjson/numberparsing.h"
|
||||||
#include "simdjson/parsedjson.h"
|
#include "simdjson/parsedjson.h"
|
||||||
#include "simdjson/simdjson.h"
|
#include "simdjson/simdjson.h"
|
||||||
#include "simdjson/stringparsing.h"
|
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
void init_state_machine();
|
void init_state_machine();
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* auto-generated on Wed Aug 14 13:56:54 DST 2019. Do not edit! */
|
/* auto-generated on Sun Aug 18 15:06:50 DST 2019. Do not edit! */
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -18,23 +18,55 @@ MESSAGE( STATUS "SIMDJSON_LIB_TYPE: " ${SIMDJSON_LIB_TYPE})
|
||||||
|
|
||||||
# Bring in include files
|
# Bring in include files
|
||||||
include(../include/CMakeLists.txt)
|
include(../include/CMakeLists.txt)
|
||||||
set(SIMDJSON_SRC
|
|
||||||
jsonioutil.cpp
|
|
||||||
jsonminifier.cpp
|
|
||||||
jsonparser.cpp
|
|
||||||
stage1_find_marks.cpp
|
|
||||||
stage2_build_tape.cpp
|
|
||||||
parsedjson.cpp
|
|
||||||
parsedjsoniterator.cpp
|
|
||||||
simdjson.cpp
|
|
||||||
${SIMDJSON_INCLUDE}
|
|
||||||
)
|
|
||||||
|
|
||||||
add_library(${SIMDJSON_LIB_NAME} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC})
|
set(SIMDJSON_SRC_DIR $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>)
|
||||||
|
|
||||||
|
set(SIMDJSON_SRC
|
||||||
|
jsonioutil.cpp
|
||||||
|
jsonminifier.cpp
|
||||||
|
jsonparser.cpp
|
||||||
|
stage1_find_marks.cpp
|
||||||
|
stage2_build_tape.cpp
|
||||||
|
parsedjson.cpp
|
||||||
|
parsedjsoniterator.cpp
|
||||||
|
simdjson.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load headers and sources
|
||||||
|
set(SIMDJSON_SRC_HEADERS
|
||||||
|
arm64/architecture.h
|
||||||
|
arm64/simd_input.h
|
||||||
|
arm64/simdutf8check.h
|
||||||
|
arm64/stage1_find_marks.h
|
||||||
|
arm64/stage2_build_tape.h
|
||||||
|
arm64/stringparsing.h
|
||||||
|
generic/stage1_find_marks_flatten.h
|
||||||
|
generic/stage1_find_marks.h
|
||||||
|
generic/stage2_build_tape.h
|
||||||
|
generic/stringparsing.h
|
||||||
|
haswell/architecture.h
|
||||||
|
haswell/simd_input.h
|
||||||
|
haswell/simdutf8check.h
|
||||||
|
haswell/stage1_find_marks.h
|
||||||
|
haswell/stage2_build_tape.h
|
||||||
|
haswell/stringparsing.h
|
||||||
|
westmere/architecture.h
|
||||||
|
westmere/simd_input.h
|
||||||
|
westmere/simdutf8check.h
|
||||||
|
westmere/stage1_find_marks.h
|
||||||
|
westmere/stage2_build_tape.h
|
||||||
|
westmere/stringparsing.h
|
||||||
|
simd_input.h
|
||||||
|
simdutf8check.h
|
||||||
|
stringparsing.h
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(${SIMDJSON_LIB_NAME} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC} ${SIMDJSON_INCLUDE} ${SIMDJSON_SRC_HEADERS})
|
||||||
|
|
||||||
target_include_directories(${SIMDJSON_LIB_NAME}
|
target_include_directories(${SIMDJSON_LIB_NAME}
|
||||||
PUBLIC
|
PUBLIC
|
||||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
|
$<BUILD_INTERFACE:${SIMDJSON_SRC_DIR}>
|
||||||
|
$<BUILD_INTERFACE:${SIMDJSON_INCLUDE_DIR}>
|
||||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
#ifndef SIMDJSON_ARM64_ARCHITECTURE_H
|
||||||
|
#define SIMDJSON_ARM64_ARCHITECTURE_H
|
||||||
|
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
|
#ifdef IS_ARM64
|
||||||
|
|
||||||
|
#include "simdjson/simdjson.h"
|
||||||
|
|
||||||
|
namespace simdjson::arm64 {
|
||||||
|
|
||||||
|
static const Architecture ARCHITECTURE = Architecture::ARM64;
|
||||||
|
|
||||||
|
} // namespace simdjson::arm64
|
||||||
|
|
||||||
|
#endif // IS_ARM64
|
||||||
|
|
||||||
|
#endif // SIMDJSON_ARM64_ARCHITECTURE_H
|
|
@ -1,9 +1,10 @@
|
||||||
#ifndef SIMDJSON_SIMD_INPUT_ARM64_H
|
#ifndef SIMDJSON_ARM64_SIMD_INPUT_H
|
||||||
#define SIMDJSON_SIMD_INPUT_ARM64_H
|
#define SIMDJSON_ARM64_SIMD_INPUT_H
|
||||||
|
|
||||||
#include "simdjson/simd_input.h"
|
#include "../simd_input.h"
|
||||||
|
|
||||||
#ifdef IS_ARM64
|
#ifdef IS_ARM64
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
||||||
really_inline uint16_t neon_movemask(uint8x16_t input) {
|
really_inline uint16_t neon_movemask(uint8x16_t input) {
|
||||||
|
@ -68,4 +69,4 @@ struct simd_input<Architecture::ARM64> {
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
#endif // IS_ARM64
|
#endif // IS_ARM64
|
||||||
#endif // SIMDJSON_SIMD_INPUT_ARM64_H
|
#endif // SIMDJSON_ARM64_SIMD_INPUT_H
|
|
@ -1,13 +1,13 @@
|
||||||
// From https://github.com/cyb70289/utf8/blob/master/lemire-neon.c
|
// From https://github.com/cyb70289/utf8/blob/master/lemire-neon.c
|
||||||
// Adapted from https://github.com/lemire/fastvalidate-utf-8
|
// Adapted from https://github.com/lemire/fastvalidate-utf-8
|
||||||
|
|
||||||
#ifndef SIMDJSON_SIMDUTF8CHECK_ARM64_H
|
#ifndef SIMDJSON_ARM64_SIMDUTF8CHECK_H
|
||||||
#define SIMDJSON_SIMDUTF8CHECK_ARM64_H
|
#define SIMDJSON_ARM64_SIMDUTF8CHECK_H
|
||||||
|
|
||||||
#if defined(_ARM_NEON) || defined(__aarch64__) || \
|
#if defined(_ARM_NEON) || defined(__aarch64__) || \
|
||||||
(defined(_MSC_VER) && defined(_M_ARM64))
|
(defined(_MSC_VER) && defined(_M_ARM64))
|
||||||
|
|
||||||
#include "simdjson/simdutf8check.h"
|
#include "../simdutf8check.h"
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
@ -31,7 +31,7 @@
|
||||||
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
namespace simdjson {
|
namespace simdjson::arm64 {
|
||||||
|
|
||||||
// all byte values must be no larger than 0xF4
|
// all byte values must be no larger than 0xF4
|
||||||
static inline void check_smaller_than_0xF4(int8x16_t current_bytes,
|
static inline void check_smaller_than_0xF4(int8x16_t current_bytes,
|
||||||
|
@ -191,6 +191,12 @@ really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) {
|
||||||
return vget_lane_u64(result, 0) == 0;
|
return vget_lane_u64(result, 0) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace simdjson::arm64
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
using namespace simdjson::arm64;
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct utf8_checker<Architecture::ARM64> {
|
struct utf8_checker<Architecture::ARM64> {
|
||||||
int8x16_t has_error{};
|
int8x16_t has_error{};
|
|
@ -1,16 +1,19 @@
|
||||||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
|
||||||
#define SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
|
||||||
|
|
||||||
#include "simdjson/simd_input_arm64.h"
|
#include "simdjson/portability.h"
|
||||||
#include "simdjson/simdutf8check_arm64.h"
|
|
||||||
#include "simdjson/stage1_find_marks.h"
|
|
||||||
|
|
||||||
#ifdef IS_ARM64
|
#ifdef IS_ARM64
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
template <>
|
#include "arm64/architecture.h"
|
||||||
really_inline uint64_t
|
#include "arm64/simd_input.h"
|
||||||
compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
|
#include "arm64/simdutf8check.h"
|
||||||
|
#include "simdjson/stage1_find_marks.h"
|
||||||
|
|
||||||
|
namespace simdjson::arm64 {
|
||||||
|
|
||||||
|
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
|
||||||
|
|
||||||
#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
|
#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
|
||||||
return vmull_p64(-1ULL, quote_bits);
|
return vmull_p64(-1ULL, quote_bits);
|
||||||
#else
|
#else
|
||||||
|
@ -18,9 +21,8 @@ compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
static really_inline void find_whitespace_and_structurals(
|
||||||
really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
|
simd_input<ARCHITECTURE> in, uint64_t &whitespace,
|
||||||
simd_input<Architecture::ARM64> in, uint64_t &whitespace,
|
|
||||||
uint64_t &structurals) {
|
uint64_t &structurals) {
|
||||||
const uint8x16_t low_nibble_mask =
|
const uint8x16_t low_nibble_mask =
|
||||||
(uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
|
(uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
|
||||||
|
@ -66,7 +68,20 @@ really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
|
||||||
uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
|
uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
|
||||||
whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
|
whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include "generic/stage1_find_marks_flatten.h"
|
||||||
|
#include "generic/stage1_find_marks.h"
|
||||||
|
|
||||||
|
} // namespace simdjson::arm64
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||||
|
return arm64::find_structural_bits(buf, len, pj);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
#endif // IS_ARM64
|
#endif // IS_ARM64
|
||||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
|
|
@ -0,0 +1,30 @@
|
||||||
|
#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
|
||||||
|
#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
|
||||||
|
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
|
#ifdef IS_ARM64
|
||||||
|
|
||||||
|
#include "simdjson/stage2_build_tape.h"
|
||||||
|
#include "arm64/architecture.h"
|
||||||
|
#include "arm64/stringparsing.h"
|
||||||
|
|
||||||
|
namespace simdjson::arm64 {
|
||||||
|
|
||||||
|
#include "generic/stage2_build_tape.h"
|
||||||
|
|
||||||
|
} // namespace simdjson::arm64
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
WARN_UNUSED int
|
||||||
|
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
|
return arm64::unified_machine(buf, len, pj);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
#endif // IS_ARM64
|
||||||
|
|
||||||
|
#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
|
|
@ -1,14 +1,15 @@
|
||||||
#ifndef SIMDJSON_STRINGPARSING_ARM64_H
|
#ifndef SIMDJSON_ARM64_STRINGPARSING_H
|
||||||
#define SIMDJSON_STRINGPARSING_ARM64_H
|
#define SIMDJSON_ARM64_STRINGPARSING_H
|
||||||
|
|
||||||
#include "simdjson/stringparsing.h"
|
#include "../stringparsing.h"
|
||||||
|
|
||||||
#ifdef IS_ARM64
|
#ifdef IS_ARM64
|
||||||
namespace simdjson {
|
|
||||||
template <>
|
#include "arm64/architecture.h"
|
||||||
really_inline parse_string_helper
|
|
||||||
find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
|
namespace simdjson::arm64 {
|
||||||
uint8_t *dst) {
|
|
||||||
|
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||||
// SIMDJSON_PADDING of padding
|
// SIMDJSON_PADDING of padding
|
||||||
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
|
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
|
||||||
|
@ -39,15 +40,13 @@ find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
|
||||||
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits
|
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits
|
||||||
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits
|
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace simdjson
|
#include "generic/stringparsing.h"
|
||||||
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::ARM64
|
}
|
||||||
#define TARGETED_REGION TARGET_ARM64
|
// namespace simdjson::amd64
|
||||||
#include "simdjson/stringparsing_common.h"
|
|
||||||
#undef TARGETED_ARCHITECTURE
|
|
||||||
#undef TARGETED_REGION
|
|
||||||
|
|
||||||
#endif // IS_ARM64
|
#endif // IS_ARM64
|
||||||
#endif
|
#endif
|
|
@ -1,14 +1,8 @@
|
||||||
// This file contains the common code every implementation uses in stage1
|
// This file contains the common code every implementation uses in stage1
|
||||||
// It is intended to be included multiple times and compiled multiple times
|
// It is intended to be included multiple times and compiled multiple times
|
||||||
// We assume the file in which it is include already includes
|
// We assume the file in which it is included already includes
|
||||||
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
||||||
|
|
||||||
#ifdef TARGETED_ARCHITECTURE
|
|
||||||
#ifdef TARGETED_REGION
|
|
||||||
|
|
||||||
TARGETED_REGION
|
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
// return a bitvector indicating where we have characters that end an odd-length
|
// return a bitvector indicating where we have characters that end an odd-length
|
||||||
// sequence of backslashes (and thus change the behavior of the next character
|
// sequence of backslashes (and thus change the behavior of the next character
|
||||||
// to follow). A even-length sequence of backslashes, and, for that matter, the
|
// to follow). A even-length sequence of backslashes, and, for that matter, the
|
||||||
|
@ -18,9 +12,8 @@ namespace simdjson {
|
||||||
// indicate whether we end an iteration on an odd-length sequence of
|
// indicate whether we end an iteration on an odd-length sequence of
|
||||||
// backslashes, which modifies our subsequent search for odd-length
|
// backslashes, which modifies our subsequent search for odd-length
|
||||||
// sequences of backslashes in an obvious way.
|
// sequences of backslashes in an obvious way.
|
||||||
template <>
|
really_inline uint64_t find_odd_backslash_sequences(
|
||||||
really_inline uint64_t find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
|
simd_input<ARCHITECTURE> in,
|
||||||
simd_input<TARGETED_ARCHITECTURE> in,
|
|
||||||
uint64_t &prev_iter_ends_odd_backslash) {
|
uint64_t &prev_iter_ends_odd_backslash) {
|
||||||
const uint64_t even_bits = 0x5555555555555555ULL;
|
const uint64_t even_bits = 0x5555555555555555ULL;
|
||||||
const uint64_t odd_bits = ~even_bits;
|
const uint64_t odd_bits = ~even_bits;
|
||||||
|
@ -66,14 +59,13 @@ really_inline uint64_t find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
|
||||||
// Note that we don't do any error checking to see if we have backslash
|
// Note that we don't do any error checking to see if we have backslash
|
||||||
// sequences outside quotes; these
|
// sequences outside quotes; these
|
||||||
// backslash sequences (of any length) will be detected elsewhere.
|
// backslash sequences (of any length) will be detected elsewhere.
|
||||||
template <>
|
really_inline uint64_t find_quote_mask_and_bits(
|
||||||
really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
|
simd_input<ARCHITECTURE> in, uint64_t odd_ends,
|
||||||
simd_input<TARGETED_ARCHITECTURE> in, uint64_t odd_ends,
|
|
||||||
uint64_t &prev_iter_inside_quote, uint64_t "e_bits,
|
uint64_t &prev_iter_inside_quote, uint64_t "e_bits,
|
||||||
uint64_t &error_mask) {
|
uint64_t &error_mask) {
|
||||||
quote_bits = in.eq('"');
|
quote_bits = in.eq('"');
|
||||||
quote_bits = quote_bits & ~odd_ends;
|
quote_bits = quote_bits & ~odd_ends;
|
||||||
uint64_t quote_mask = compute_quote_mask<TARGETED_ARCHITECTURE>(quote_bits);
|
uint64_t quote_mask = compute_quote_mask(quote_bits);
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
/* All Unicode characters may be placed within the
|
/* All Unicode characters may be placed within the
|
||||||
* quotation marks, except for the characters that MUST be escaped:
|
* quotation marks, except for the characters that MUST be escaped:
|
||||||
|
@ -90,33 +82,65 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
|
||||||
return quote_mask;
|
return quote_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
really_inline uint64_t finalize_structurals(
|
||||||
|
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
|
||||||
|
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
|
||||||
|
// mask off anything inside quotes
|
||||||
|
structurals &= ~quote_mask;
|
||||||
|
// add the real quote bits back into our bit_mask as well, so we can
|
||||||
|
// quickly traverse the strings we've spent all this trouble gathering
|
||||||
|
structurals |= quote_bits;
|
||||||
|
// Now, establish "pseudo-structural characters". These are non-whitespace
|
||||||
|
// characters that are (a) outside quotes and (b) have a predecessor that's
|
||||||
|
// either whitespace or a structural character. This means that subsequent
|
||||||
|
// passes will get a chance to encounter the first character of every string
|
||||||
|
// of non-whitespace and, if we're parsing an atom like true/false/null or a
|
||||||
|
// number we can stop at the first whitespace or structural character
|
||||||
|
// following it.
|
||||||
|
|
||||||
|
// a qualified predecessor is something that can happen 1 position before an
|
||||||
|
// pseudo-structural character
|
||||||
|
uint64_t pseudo_pred = structurals | whitespace;
|
||||||
|
|
||||||
|
uint64_t shifted_pseudo_pred =
|
||||||
|
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
|
||||||
|
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
|
||||||
|
uint64_t pseudo_structurals =
|
||||||
|
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
|
||||||
|
structurals |= pseudo_structurals;
|
||||||
|
|
||||||
|
// now, we've used our close quotes all we need to. So let's switch them off
|
||||||
|
// they will be off in the quote mask and on in quote bits.
|
||||||
|
structurals &= ~(quote_bits & ~quote_mask);
|
||||||
|
return structurals;
|
||||||
|
}
|
||||||
|
|
||||||
// Find structural bits in a 64-byte chunk.
|
// Find structural bits in a 64-byte chunk.
|
||||||
really_inline void find_structural_bits_64(
|
really_inline void find_structural_bits_64(
|
||||||
const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base,
|
const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base,
|
||||||
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
|
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
|
||||||
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
|
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
|
||||||
uint64_t &error_mask,
|
uint64_t &error_mask,
|
||||||
utf8_checker<TARGETED_ARCHITECTURE> &utf8_state) {
|
utf8_checker<ARCHITECTURE> &utf8_state) {
|
||||||
simd_input<TARGETED_ARCHITECTURE> in(buf);
|
simd_input<ARCHITECTURE> in(buf);
|
||||||
utf8_state.check_next_input(in);
|
utf8_state.check_next_input(in);
|
||||||
/* detect odd sequences of backslashes */
|
/* detect odd sequences of backslashes */
|
||||||
uint64_t odd_ends = find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
|
uint64_t odd_ends = find_odd_backslash_sequences(
|
||||||
in, prev_iter_ends_odd_backslash);
|
in, prev_iter_ends_odd_backslash);
|
||||||
|
|
||||||
/* detect insides of quote pairs ("quote_mask") and also our quote_bits
|
/* detect insides of quote pairs ("quote_mask") and also our quote_bits
|
||||||
* themselves */
|
* themselves */
|
||||||
uint64_t quote_bits;
|
uint64_t quote_bits;
|
||||||
uint64_t quote_mask = find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
|
uint64_t quote_mask = find_quote_mask_and_bits(
|
||||||
in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
|
in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
|
||||||
|
|
||||||
/* take the previous iterations structural bits, not our current
|
/* take the previous iterations structural bits, not our current
|
||||||
* iteration,
|
* iteration,
|
||||||
* and flatten */
|
* and flatten */
|
||||||
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
|
flatten_bits(base_ptr, base, idx, structurals);
|
||||||
|
|
||||||
uint64_t whitespace;
|
uint64_t whitespace;
|
||||||
find_whitespace_and_structurals<TARGETED_ARCHITECTURE>(in, whitespace,
|
find_whitespace_and_structurals(in, whitespace, structurals);
|
||||||
structurals);
|
|
||||||
|
|
||||||
/* fixup structurals to reflect quotes and add pseudo-structural
|
/* fixup structurals to reflect quotes and add pseudo-structural
|
||||||
* characters */
|
* characters */
|
||||||
|
@ -124,9 +148,7 @@ really_inline void find_structural_bits_64(
|
||||||
quote_bits, prev_iter_ends_pseudo_pred);
|
quote_bits, prev_iter_ends_pseudo_pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||||
int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
|
||||||
ParsedJson &pj) {
|
|
||||||
if (len > pj.byte_capacity) {
|
if (len > pj.byte_capacity) {
|
||||||
std::cerr << "Your ParsedJson object only supports documents up to "
|
std::cerr << "Your ParsedJson object only supports documents up to "
|
||||||
<< pj.byte_capacity << " bytes but you are trying to process "
|
<< pj.byte_capacity << " bytes but you are trying to process "
|
||||||
|
@ -135,7 +157,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
||||||
}
|
}
|
||||||
uint32_t *base_ptr = pj.structural_indexes;
|
uint32_t *base_ptr = pj.structural_indexes;
|
||||||
uint32_t base = 0;
|
uint32_t base = 0;
|
||||||
utf8_checker<TARGETED_ARCHITECTURE> utf8_state;
|
utf8_checker<ARCHITECTURE> utf8_state;
|
||||||
|
|
||||||
/* we have padded the input out to 64 byte multiple with the remainder
|
/* we have padded the input out to 64 byte multiple with the remainder
|
||||||
* being zeros persistent state across loop does the last iteration end
|
* being zeros persistent state across loop does the last iteration end
|
||||||
|
@ -194,7 +216,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
||||||
|
|
||||||
/* finally, flatten out the remaining structurals from the last iteration
|
/* finally, flatten out the remaining structurals from the last iteration
|
||||||
*/
|
*/
|
||||||
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
|
flatten_bits(base_ptr, base, idx, structurals);
|
||||||
|
|
||||||
pj.n_structural_indexes = base;
|
pj.n_structural_indexes = base;
|
||||||
/* a valid JSON file cannot have zero structural indexes - we should have
|
/* a valid JSON file cannot have zero structural indexes - we should have
|
||||||
|
@ -217,13 +239,3 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
||||||
}
|
}
|
||||||
return utf8_state.errors();
|
return utf8_state.errors();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error TARGETED_REGION must be specified before including.
|
|
||||||
#endif // TARGETED_REGION
|
|
||||||
#else
|
|
||||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
|
||||||
#endif // TARGETED_ARCHITECTURE
|
|
|
@ -3,20 +3,12 @@
|
||||||
// We assume the file in which it is include already includes
|
// We assume the file in which it is include already includes
|
||||||
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
||||||
|
|
||||||
#ifdef TARGETED_ARCHITECTURE
|
|
||||||
#ifdef TARGETED_REGION
|
|
||||||
|
|
||||||
TARGETED_REGION
|
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
|
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
|
||||||
//
|
|
||||||
// This is just a naive implementation. It should be normally
|
// This is just a naive implementation. It should be normally
|
||||||
// disable, but can be used for research purposes to compare
|
// disable, but can be used for research purposes to compare
|
||||||
// again our optimized version.
|
// again our optimized version.
|
||||||
template <>
|
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
|
||||||
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
|
|
||||||
uint32_t idx, uint64_t bits) {
|
|
||||||
uint32_t *out_ptr = base_ptr + base;
|
uint32_t *out_ptr = base_ptr + base;
|
||||||
idx -= 64;
|
idx -= 64;
|
||||||
while (bits != 0) {
|
while (bits != 0) {
|
||||||
|
@ -27,15 +19,14 @@ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint3
|
||||||
base = (out_ptr - base_ptr);
|
base = (out_ptr - base_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else // SIMDJSON_NAIVE_FLATTEN
|
||||||
|
|
||||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||||
// plus their position in the bitvector, and store these indexes at
|
// plus their position in the bitvector, and store these indexes at
|
||||||
// base_ptr[base] incrementing base as we go
|
// base_ptr[base] incrementing base as we go
|
||||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||||
// needs to be large enough to handle this
|
// needs to be large enough to handle this
|
||||||
template<>
|
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
|
||||||
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
|
|
||||||
uint32_t idx, uint64_t bits) {
|
|
||||||
// In some instances, the next branch is expensive because it is mispredicted.
|
// In some instances, the next branch is expensive because it is mispredicted.
|
||||||
// Unfortunately, in other cases,
|
// Unfortunately, in other cases,
|
||||||
// it helps tremendously.
|
// it helps tremendously.
|
||||||
|
@ -96,13 +87,3 @@ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint3
|
||||||
base = next_base;
|
base = next_base;
|
||||||
}
|
}
|
||||||
#endif // SIMDJSON_NAIVE_FLATTEN
|
#endif // SIMDJSON_NAIVE_FLATTEN
|
||||||
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error TARGETED_REGION must be specified before including.
|
|
||||||
#endif // TARGETED_REGION
|
|
||||||
#else
|
|
||||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
|
||||||
#endif // TARGETED_ARCHITECTURE
|
|
|
@ -3,12 +3,6 @@
|
||||||
// We assume the file in which it is include already includes
|
// We assume the file in which it is include already includes
|
||||||
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
|
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
|
||||||
|
|
||||||
#ifdef TARGETED_ARCHITECTURE
|
|
||||||
#ifdef TARGETED_REGION
|
|
||||||
|
|
||||||
TARGETED_REGION
|
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
// this macro reads the next structural character, updating idx, i and c.
|
// this macro reads the next structural character, updating idx, i and c.
|
||||||
#define UPDATE_CHAR() \
|
#define UPDATE_CHAR() \
|
||||||
{ \
|
{ \
|
||||||
|
@ -41,10 +35,8 @@ namespace simdjson {
|
||||||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||||
* for documentation.
|
* for documentation.
|
||||||
***********/
|
***********/
|
||||||
template <>
|
|
||||||
WARN_UNUSED int
|
WARN_UNUSED int
|
||||||
unified_machine<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
ParsedJson &pj) {
|
|
||||||
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */
|
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */
|
||||||
uint32_t idx; /* location of the structural character in the input (buf) */
|
uint32_t idx; /* location of the structural character in the input (buf) */
|
||||||
uint8_t c; /* used to track the (structural) character we are looking at,
|
uint8_t c; /* used to track the (structural) character we are looking at,
|
||||||
|
@ -100,7 +92,7 @@ unified_machine<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
||||||
* https://tools.ietf.org/html/rfc8259
|
* https://tools.ietf.org/html/rfc8259
|
||||||
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */
|
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */
|
||||||
case '"': {
|
case '"': {
|
||||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -229,7 +221,7 @@ object_begin:
|
||||||
UPDATE_CHAR();
|
UPDATE_CHAR();
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '"': {
|
case '"': {
|
||||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
goto object_key_state;
|
goto object_key_state;
|
||||||
|
@ -248,7 +240,7 @@ object_key_state:
|
||||||
UPDATE_CHAR();
|
UPDATE_CHAR();
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '"': {
|
case '"': {
|
||||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -333,7 +325,7 @@ object_continue:
|
||||||
if (c != '"') {
|
if (c != '"') {
|
||||||
goto fail;
|
goto fail;
|
||||||
} else {
|
} else {
|
||||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
goto object_key_state;
|
goto object_key_state;
|
||||||
|
@ -367,7 +359,7 @@ main_array_switch:
|
||||||
* on paths that can accept a close square brace (post-, and at start) */
|
* on paths that can accept a close square brace (post-, and at start) */
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '"': {
|
case '"': {
|
||||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -523,13 +515,3 @@ fail:
|
||||||
pj.error_code = simdjson::TAPE_ERROR;
|
pj.error_code = simdjson::TAPE_ERROR;
|
||||||
return pj.error_code;
|
return pj.error_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error TARGETED_REGION must be specified before including.
|
|
||||||
#endif // TARGETED_REGION
|
|
||||||
#else
|
|
||||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
|
||||||
#endif // TARGETED_ARCHITECTURE
|
|
|
@ -1,28 +1,19 @@
|
||||||
// This file contains the common code every implementation uses
|
// This file contains the common code every implementation uses
|
||||||
// It is intended to be included multiple times and compiled multiple times
|
// It is intended to be included multiple times and compiled multiple times
|
||||||
// We assume the file in which it is include already includes
|
// We assume the file in which it is include already includes
|
||||||
// "simdjson/stringparsing.h" (this simplifies amalgation)
|
// "stringparsing.h" (this simplifies amalgation)
|
||||||
|
|
||||||
#ifdef TARGETED_ARCHITECTURE
|
WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
|
||||||
#ifdef TARGETED_REGION
|
UNUSED size_t len, ParsedJson &pj,
|
||||||
|
UNUSED const uint32_t depth,
|
||||||
TARGETED_REGION
|
UNUSED uint32_t offset) {
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
template <>
|
|
||||||
WARN_UNUSED
|
|
||||||
really_inline bool
|
|
||||||
parse_string<TARGETED_ARCHITECTURE>(UNUSED const uint8_t *buf,
|
|
||||||
UNUSED size_t len, ParsedJson &pj,
|
|
||||||
UNUSED const uint32_t depth,
|
|
||||||
UNUSED uint32_t offset) {
|
|
||||||
pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
|
pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
|
||||||
const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */
|
const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */
|
||||||
uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t);
|
uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t);
|
||||||
const uint8_t *const start_of_string = dst;
|
const uint8_t *const start_of_string = dst;
|
||||||
while (1) {
|
while (1) {
|
||||||
parse_string_helper helper =
|
parse_string_helper helper =
|
||||||
find_bs_bits_and_quote_bits<TARGETED_ARCHITECTURE>(src, dst);
|
find_bs_bits_and_quote_bits(src, dst);
|
||||||
if (((helper.bs_bits - 1) & helper.quote_bits) != 0) {
|
if (((helper.bs_bits - 1) & helper.quote_bits) != 0) {
|
||||||
/* we encountered quotes first. Move dst to point to quotes and exit
|
/* we encountered quotes first. Move dst to point to quotes and exit
|
||||||
*/
|
*/
|
||||||
|
@ -80,7 +71,7 @@ WARN_UNUSED
|
||||||
} else {
|
} else {
|
||||||
/* they are the same. Since they can't co-occur, it means we
|
/* they are the same. Since they can't co-occur, it means we
|
||||||
* encountered neither. */
|
* encountered neither. */
|
||||||
if constexpr (TARGETED_ARCHITECTURE == Architecture::WESTMERE) {
|
if constexpr (ARCHITECTURE == Architecture::WESTMERE) {
|
||||||
src += 16;
|
src += 16;
|
||||||
dst += 16;
|
dst += 16;
|
||||||
} else {
|
} else {
|
||||||
|
@ -92,13 +83,3 @@ WARN_UNUSED
|
||||||
/* can't be reached */
|
/* can't be reached */
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error TARGETED_REGION must be specified before including.
|
|
||||||
#endif // TARGETED_REGION
|
|
||||||
#else
|
|
||||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
|
||||||
#endif // TARGETED_ARCHITECTURE
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
#ifndef SIMDJSON_HASWELL_ARCHITECTURE_H
|
||||||
|
#define SIMDJSON_HASWELL_ARCHITECTURE_H
|
||||||
|
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
#include "simdjson/simdjson.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace simdjson::haswell {
|
||||||
|
|
||||||
|
static const Architecture ARCHITECTURE = Architecture::HASWELL;
|
||||||
|
|
||||||
|
} // namespace simdjson::haswell
|
||||||
|
|
||||||
|
|
||||||
|
#endif // IS_X86_64
|
||||||
|
|
||||||
|
#endif // SIMDJSON_HASWELL_ARCHITECTURE_H
|
|
@ -1,7 +1,7 @@
|
||||||
#ifndef SIMDJSON_SIMD_INPUT_HASWELL_H
|
#ifndef SIMDJSON_HASWELL_SIMD_INPUT_H
|
||||||
#define SIMDJSON_SIMD_INPUT_HASWELL_H
|
#define SIMDJSON_HASWELL_SIMD_INPUT_H
|
||||||
|
|
||||||
#include "simdjson/simd_input.h"
|
#include "../simd_input.h"
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
@ -42,4 +42,4 @@ struct simd_input<Architecture::HASWELL> {
|
||||||
UNTARGET_REGION
|
UNTARGET_REGION
|
||||||
|
|
||||||
#endif // IS_X86_64
|
#endif // IS_X86_64
|
||||||
#endif // SIMDJSON_SIMD_INPUT_HASWELL_H
|
#endif // SIMDJSON_HASWELL_SIMD_INPUT_H
|
|
@ -1,8 +1,8 @@
|
||||||
#ifndef SIMDJSON_SIMDUTF8CHECK_HASWELL_H
|
#ifndef SIMDJSON_HASWELL_SIMDUTF8CHECK_H
|
||||||
#define SIMDJSON_SIMDUTF8CHECK_HASWELL_H
|
#define SIMDJSON_HASWELL_SIMDUTF8CHECK_H
|
||||||
|
|
||||||
#include "simdjson/portability.h"
|
#include "simdjson/portability.h"
|
||||||
#include "simdjson/simdutf8check.h"
|
#include "../simdutf8check.h"
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -28,7 +28,8 @@
|
||||||
// all byte values must be no larger than 0xF4
|
// all byte values must be no larger than 0xF4
|
||||||
|
|
||||||
TARGET_HASWELL
|
TARGET_HASWELL
|
||||||
namespace simdjson {
|
namespace simdjson::haswell {
|
||||||
|
|
||||||
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
|
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
|
||||||
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
|
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
|
||||||
}
|
}
|
||||||
|
@ -192,6 +193,14 @@ avx_check_utf8_bytes(__m256i current_bytes,
|
||||||
return pb;
|
return pb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}; // namespace simdjson::haswell
|
||||||
|
UNTARGET_REGION // haswell
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
using namespace simdjson::haswell;
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct utf8_checker<Architecture::HASWELL> {
|
struct utf8_checker<Architecture::HASWELL> {
|
||||||
__m256i has_error;
|
__m256i has_error;
|
||||||
|
@ -230,7 +239,7 @@ struct utf8_checker<Architecture::HASWELL> {
|
||||||
}
|
}
|
||||||
}; // struct utf8_checker
|
}; // struct utf8_checker
|
||||||
|
|
||||||
} // namespace simdjson
|
}; // namespace simdjson
|
||||||
UNTARGET_REGION // haswell
|
UNTARGET_REGION // haswell
|
||||||
|
|
||||||
#endif // IS_X86_64
|
#endif // IS_X86_64
|
|
@ -0,0 +1,185 @@
|
||||||
|
#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
|
||||||
|
#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
|
||||||
|
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
#include "haswell/architecture.h"
|
||||||
|
#include "haswell/simd_input.h"
|
||||||
|
#include "haswell/simdutf8check.h"
|
||||||
|
#include "simdjson/stage1_find_marks.h"
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
namespace simdjson::haswell {
|
||||||
|
|
||||||
|
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
|
||||||
|
// There should be no such thing with a processing supporting avx2
|
||||||
|
// but not clmul.
|
||||||
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
||||||
|
return quote_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
||||||
|
uint64_t &whitespace, uint64_t &structurals) {
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_NAIVE_STRUCTURAL
|
||||||
|
// You should never need this naive approach, but it can be useful
|
||||||
|
// for research purposes
|
||||||
|
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
|
||||||
|
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
|
||||||
|
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
|
||||||
|
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
|
||||||
|
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_brace));
|
||||||
|
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_brace));
|
||||||
|
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
|
||||||
|
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_open_bracket));
|
||||||
|
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_open_bracket));
|
||||||
|
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
|
||||||
|
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_bracket));
|
||||||
|
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_bracket));
|
||||||
|
const __m256i mask_column = _mm256_set1_epi8(0x3a);
|
||||||
|
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_column));
|
||||||
|
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_column));
|
||||||
|
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
|
||||||
|
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_comma));
|
||||||
|
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_comma));
|
||||||
|
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
|
||||||
|
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
|
||||||
|
structurals = (structural_res_0 | (structural_res_1 << 32));
|
||||||
|
|
||||||
|
const __m256i mask_space = _mm256_set1_epi8(0x20);
|
||||||
|
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
|
||||||
|
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
|
||||||
|
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
|
||||||
|
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_linefeed));
|
||||||
|
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_linefeed));
|
||||||
|
const __m256i mask_tab = _mm256_set1_epi8(0x09);
|
||||||
|
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_tab));
|
||||||
|
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_tab));
|
||||||
|
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
|
||||||
|
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_carriage));
|
||||||
|
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_carriage));
|
||||||
|
|
||||||
|
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
|
||||||
|
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
|
||||||
|
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||||
|
// end of naive approach
|
||||||
|
|
||||||
|
#else // SIMDJSON_NAIVE_STRUCTURAL
|
||||||
|
// clang-format off
|
||||||
|
const __m256i structural_table =
|
||||||
|
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
|
||||||
|
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
||||||
|
const __m256i white_table = _mm256_setr_epi8(
|
||||||
|
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
|
||||||
|
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
|
||||||
|
// clang-format on
|
||||||
|
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
|
||||||
|
const __m256i struct_mask = _mm256_set1_epi8(32);
|
||||||
|
|
||||||
|
__m256i lo_white = _mm256_cmpeq_epi8(in.lo, _mm256_shuffle_epi8(white_table, in.lo));
|
||||||
|
__m256i hi_white = _mm256_cmpeq_epi8(in.hi, _mm256_shuffle_epi8(white_table, in.hi));
|
||||||
|
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_white));
|
||||||
|
uint64_t ws_res_1 = _mm256_movemask_epi8(hi_white);
|
||||||
|
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||||
|
__m256i lo_struct_r1 = _mm256_add_epi8(struct_offset, in.lo);
|
||||||
|
__m256i hi_struct_r1 = _mm256_add_epi8(struct_offset, in.hi);
|
||||||
|
__m256i lo_struct_r2 = _mm256_or_si256(in.lo, struct_mask);
|
||||||
|
__m256i hi_struct_r2 = _mm256_or_si256(in.hi, struct_mask);
|
||||||
|
__m256i lo_struct_r3 = _mm256_shuffle_epi8(structural_table, lo_struct_r1);
|
||||||
|
__m256i hi_struct_r3 = _mm256_shuffle_epi8(structural_table, hi_struct_r1);
|
||||||
|
__m256i lo_struct = _mm256_cmpeq_epi8(lo_struct_r2, lo_struct_r3);
|
||||||
|
__m256i hi_struct = _mm256_cmpeq_epi8(hi_struct_r2, hi_struct_r3);
|
||||||
|
|
||||||
|
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_struct));
|
||||||
|
uint64_t structural_res_1 = _mm256_movemask_epi8(hi_struct);
|
||||||
|
structurals = (structural_res_0 | (structural_res_1 << 32));
|
||||||
|
#endif // else SIMDJSON_NAIVE_STRUCTURAL
|
||||||
|
}
|
||||||
|
|
||||||
|
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||||
|
// plus their position in the bitvector, and store these indexes at
|
||||||
|
// base_ptr[base] incrementing base as we go
|
||||||
|
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||||
|
// needs to be large enough to handle this
|
||||||
|
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
|
||||||
|
// In some instances, the next branch is expensive because it is mispredicted.
|
||||||
|
// Unfortunately, in other cases,
|
||||||
|
// it helps tremendously.
|
||||||
|
if (bits == 0)
|
||||||
|
return;
|
||||||
|
uint32_t cnt = _mm_popcnt_u64(bits);
|
||||||
|
uint32_t next_base = base + cnt;
|
||||||
|
idx -= 64;
|
||||||
|
base_ptr += base;
|
||||||
|
{
|
||||||
|
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[1] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[2] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[3] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[4] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[5] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[6] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[7] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr += 8;
|
||||||
|
}
|
||||||
|
// We hope that the next branch is easily predicted.
|
||||||
|
if (cnt > 8) {
|
||||||
|
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[1] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[2] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[3] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[4] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[5] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[6] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr[7] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr += 8;
|
||||||
|
}
|
||||||
|
if (cnt > 16) { // unluckly: we rarely get here
|
||||||
|
// since it means having one structural or pseudo-structral element
|
||||||
|
// every 4 characters (possible with inputs like "","","",...).
|
||||||
|
do {
|
||||||
|
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||||
|
bits = _blsr_u64(bits);
|
||||||
|
base_ptr++;
|
||||||
|
} while (bits != 0);
|
||||||
|
}
|
||||||
|
base = next_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "generic/stage1_find_marks.h"
|
||||||
|
|
||||||
|
} // namespace haswell
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||||
|
return haswell::find_structural_bits(buf, len, pj);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace simdjson
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
#endif // IS_X86_64
|
||||||
|
#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
|
|
@ -0,0 +1,34 @@
|
||||||
|
#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
|
||||||
|
#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
|
||||||
|
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
#include "simdjson/stage2_build_tape.h"
|
||||||
|
#include "haswell/architecture.h"
|
||||||
|
#include "haswell/stringparsing.h"
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
namespace simdjson::haswell {
|
||||||
|
|
||||||
|
#include "generic/stage2_build_tape.h"
|
||||||
|
|
||||||
|
} // namespace simdjson::haswell
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
WARN_UNUSED int
|
||||||
|
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
|
return haswell::unified_machine(buf, len, pj);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace simdjson
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
#endif // IS_X86_64
|
||||||
|
|
||||||
|
#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
|
|
@ -1,15 +1,16 @@
|
||||||
#ifndef SIMDJSON_STRINGPARSING_HASWELL_H
|
#ifndef SIMDJSON_HASWELL_STRINGPARSING_H
|
||||||
#define SIMDJSON_STRINGPARSING_HASWELL_H
|
#define SIMDJSON_HASWELL_STRINGPARSING_H
|
||||||
|
|
||||||
#include "simdjson/stringparsing.h"
|
#include "../stringparsing.h"
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
#include "haswell/architecture.h"
|
||||||
|
|
||||||
TARGET_HASWELL
|
TARGET_HASWELL
|
||||||
namespace simdjson {
|
namespace simdjson::haswell {
|
||||||
template <>
|
|
||||||
really_inline parse_string_helper
|
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||||
find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
|
|
||||||
uint8_t *dst) {
|
|
||||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||||
// SIMDJSON_PADDING of padding
|
// SIMDJSON_PADDING of padding
|
||||||
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
|
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
|
||||||
|
@ -24,14 +25,11 @@ find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
|
||||||
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask)) // quote_bits
|
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask)) // quote_bits
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::HASWELL
|
#include "generic/stringparsing.h"
|
||||||
#define TARGETED_REGION TARGET_HASWELL
|
|
||||||
#include "simdjson/stringparsing_common.h"
|
} // namespace simdjson::haswell
|
||||||
#undef TARGETED_ARCHITECTURE
|
UNTARGET_REGION
|
||||||
#undef TARGETED_REGION
|
|
||||||
|
|
||||||
#endif // IS_X86_64
|
#endif // IS_X86_64
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
#define SIMDJSON_SIMDUTF8CHECK_H
|
#define SIMDJSON_SIMDUTF8CHECK_H
|
||||||
|
|
||||||
#include "simdjson/simdjson.h"
|
#include "simdjson/simdjson.h"
|
||||||
#include "simdjson/simd_input.h"
|
#include "simd_input.h"
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
|
@ -1,34 +1,18 @@
|
||||||
#include "simdjson/stage1_find_marks.h"
|
|
||||||
#include "simdjson/portability.h"
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
namespace {
|
||||||
|
// for when clmul is unavailable
|
||||||
|
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
|
||||||
|
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
|
||||||
|
quote_mask = quote_mask ^ (quote_mask << 2);
|
||||||
|
quote_mask = quote_mask ^ (quote_mask << 4);
|
||||||
|
quote_mask = quote_mask ^ (quote_mask << 8);
|
||||||
|
quote_mask = quote_mask ^ (quote_mask << 16);
|
||||||
|
quote_mask = quote_mask ^ (quote_mask << 32);
|
||||||
|
return quote_mask;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::HASWELL
|
#include "arm64/stage1_find_marks.h"
|
||||||
#define TARGETED_REGION TARGET_HASWELL
|
#include "haswell/stage1_find_marks.h"
|
||||||
#include "simdjson/stage1_find_marks_flatten_haswell.h"
|
#include "westmere/stage1_find_marks.h"
|
||||||
#include "simdjson/stage1_find_marks_haswell.h"
|
|
||||||
#include "simdjson/stage1_find_marks_common.h"
|
|
||||||
#undef TARGETED_ARCHITECTURE
|
|
||||||
#undef TARGETED_REGION
|
|
||||||
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
|
|
||||||
#define TARGETED_REGION TARGET_WESTMERE
|
|
||||||
#include "simdjson/stage1_find_marks_flatten_common.h"
|
|
||||||
#include "simdjson/stage1_find_marks_westmere.h"
|
|
||||||
#include "simdjson/stage1_find_marks_common.h"
|
|
||||||
#undef TARGETED_ARCHITECTURE
|
|
||||||
#undef TARGETED_REGION
|
|
||||||
|
|
||||||
#endif // IS_X86_64
|
|
||||||
|
|
||||||
#ifdef IS_ARM64
|
|
||||||
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::ARM64
|
|
||||||
#define TARGETED_REGION TARGET_ARM64
|
|
||||||
#include "simdjson/stage1_find_marks_flatten_common.h"
|
|
||||||
#include "simdjson/stage1_find_marks_arm64.h"
|
|
||||||
#include "simdjson/stage1_find_marks_common.h"
|
|
||||||
#undef TARGETED_ARCHITECTURE
|
|
||||||
#undef TARGETED_REGION
|
|
||||||
|
|
||||||
#endif // IS_ARM64
|
|
||||||
|
|
|
@ -1,23 +1,3 @@
|
||||||
#include "simdjson/stage2_build_tape.h"
|
#include "arm64/stage2_build_tape.h"
|
||||||
|
#include "haswell/stage2_build_tape.h"
|
||||||
#ifdef IS_X86_64
|
#include "westmere/stage2_build_tape.h"
|
||||||
#define TARGETED_ARCHITECTURE Architecture::HASWELL
|
|
||||||
#define TARGETED_REGION TARGET_HASWELL
|
|
||||||
#include "simdjson/stage2_build_tape_common.h"
|
|
||||||
#undef TARGETED_ARCHITECTURE
|
|
||||||
#undef TARGETED_REGION
|
|
||||||
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
|
|
||||||
#define TARGETED_REGION TARGET_WESTMERE
|
|
||||||
#include "simdjson/stage2_build_tape_common.h"
|
|
||||||
#undef TARGETED_ARCHITECTURE
|
|
||||||
#undef TARGETED_REGION
|
|
||||||
#endif // IS_X86_64
|
|
||||||
|
|
||||||
#ifdef IS_ARM64
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::ARM64
|
|
||||||
#define TARGETED_REGION TARGET_ARM64
|
|
||||||
#include "simdjson/stage2_build_tape_common.h"
|
|
||||||
#undef TARGETED_ARCHITECTURE
|
|
||||||
#undef TARGETED_REGION
|
|
||||||
#endif // IS_ARM64
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ void found_bad_string(const uint8_t *buf);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
||||||
// begin copypasta
|
// begin copypasta
|
||||||
// These chars yield themselves: " \ /
|
// These chars yield themselves: " \ /
|
||||||
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
|
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
|
||||||
|
@ -84,22 +85,6 @@ struct parse_string_helper {
|
||||||
uint32_t quote_bits;
|
uint32_t quote_bits;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Finds where the backslashes and quotes are located.
|
|
||||||
template <Architecture>
|
|
||||||
parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src,
|
|
||||||
uint8_t *dst);
|
|
||||||
|
|
||||||
template <Architecture T>
|
|
||||||
WARN_UNUSED
|
|
||||||
really_inline bool
|
|
||||||
parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj,
|
|
||||||
UNUSED const uint32_t depth, UNUSED uint32_t offset);
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/// Now include the specializations:
|
#endif // SIMDJSON_STRINGPARSING_H
|
||||||
#include "simdjson/stringparsing_arm64.h"
|
|
||||||
#include "simdjson/stringparsing_haswell.h"
|
|
||||||
#include "simdjson/stringparsing_westmere.h"
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
#ifndef SIMDJSON_WESTMERE_ARCHITECTURE_H
|
||||||
|
#define SIMDJSON_WESTMERE_ARCHITECTURE_H
|
||||||
|
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
#include "simdjson/simdjson.h"
|
||||||
|
|
||||||
|
namespace simdjson::westmere {
|
||||||
|
|
||||||
|
static const Architecture ARCHITECTURE = Architecture::WESTMERE;
|
||||||
|
|
||||||
|
} // namespace simdjson::westmere
|
||||||
|
|
||||||
|
|
||||||
|
#endif // IS_X86_64
|
||||||
|
|
||||||
|
#endif // SIMDJSON_WESTMERE_ARCHITECTURE_H
|
|
@ -1,7 +1,7 @@
|
||||||
#ifndef SIMDJSON_SIMD_INPUT_WESTMERE_H
|
#ifndef SIMDJSON_WESTMERE_SIMD_INPUT_H
|
||||||
#define SIMDJSON_SIMD_INPUT_WESTMERE_H
|
#define SIMDJSON_WESTMERE_SIMD_INPUT_H
|
||||||
|
|
||||||
#include "simdjson/simd_input.h"
|
#include "../simd_input.h"
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
@ -54,4 +54,4 @@ struct simd_input<Architecture::WESTMERE> {
|
||||||
UNTARGET_REGION
|
UNTARGET_REGION
|
||||||
|
|
||||||
#endif // IS_X86_64
|
#endif // IS_X86_64
|
||||||
#endif // SIMDJSON_SIMD_INPUT_WESTMERE_H
|
#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H
|
|
@ -1,8 +1,8 @@
|
||||||
#ifndef SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
|
#ifndef SIMDJSON_WESTMERE_SIMDUTF8CHECK_H
|
||||||
#define SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
|
#define SIMDJSON_WESTMERE_SIMDUTF8CHECK_H
|
||||||
|
|
||||||
#include "simdjson/portability.h"
|
#include "simdjson/portability.h"
|
||||||
#include "simdjson/simdutf8check.h"
|
#include "simdutf8check.h"
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -29,8 +29,7 @@
|
||||||
|
|
||||||
/********** sse code **********/
|
/********** sse code **********/
|
||||||
TARGET_WESTMERE
|
TARGET_WESTMERE
|
||||||
|
namespace simdjson::westmere {
|
||||||
namespace simdjson {
|
|
||||||
|
|
||||||
// all byte values must be no larger than 0xF4
|
// all byte values must be no larger than 0xF4
|
||||||
static inline void check_smaller_than_0xF4(__m128i current_bytes,
|
static inline void check_smaller_than_0xF4(__m128i current_bytes,
|
||||||
|
@ -164,6 +163,14 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
|
||||||
return pb;
|
return pb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace simdjson::westmere
|
||||||
|
UNTARGET_REGION // westmere
|
||||||
|
|
||||||
|
TARGET_WESTMERE
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
using namespace simdjson::westmere;
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct utf8_checker<Architecture::WESTMERE> {
|
struct utf8_checker<Architecture::WESTMERE> {
|
||||||
__m128i has_error = _mm_setzero_si128();
|
__m128i has_error = _mm_setzero_si128();
|
|
@ -1,30 +1,30 @@
|
||||||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
|
||||||
#define SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
|
||||||
|
|
||||||
#include "simdjson/simd_input_westmere.h"
|
#include "simdjson/portability.h"
|
||||||
#include "simdjson/simdutf8check_westmere.h"
|
|
||||||
#include "simdjson/stage1_find_marks.h"
|
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
TARGET_WESTMERE
|
#include "westmere/architecture.h"
|
||||||
namespace simdjson {
|
#include "westmere/simd_input.h"
|
||||||
|
#include "westmere/simdutf8check.h"
|
||||||
|
#include "simdjson/stage1_find_marks.h"
|
||||||
|
|
||||||
template <>
|
TARGET_WESTMERE
|
||||||
really_inline uint64_t
|
namespace simdjson::westmere {
|
||||||
compute_quote_mask<Architecture::WESTMERE>(uint64_t quote_bits) {
|
|
||||||
|
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
|
||||||
return _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
return _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
||||||
really_inline void find_whitespace_and_structurals<Architecture::WESTMERE>(
|
uint64_t &whitespace, uint64_t &structurals) {
|
||||||
simd_input<Architecture::WESTMERE> in, uint64_t &whitespace,
|
|
||||||
uint64_t &structurals) {
|
|
||||||
const __m128i structural_table =
|
const __m128i structural_table =
|
||||||
_mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
_mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
||||||
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
|
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
|
||||||
100, 9, 10, 112, 100, 13, 100, 100);
|
100, 9, 10, 112, 100, 13, 100, 100);
|
||||||
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
|
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
|
||||||
const __m128i struct_mask = _mm_set1_epi8(32);
|
const __m128i struct_mask = _mm_set1_epi8(32);
|
||||||
|
|
||||||
|
@ -66,11 +66,25 @@ really_inline void find_whitespace_and_structurals<Architecture::WESTMERE>(
|
||||||
uint64_t structural_res_3 = _mm_movemask_epi8(struct4);
|
uint64_t structural_res_3 = _mm_movemask_epi8(struct4);
|
||||||
|
|
||||||
structurals = (structural_res_0 | (structural_res_1 << 16) |
|
structurals = (structural_res_0 | (structural_res_1 << 16) |
|
||||||
(structural_res_2 << 32) | (structural_res_3 << 48));
|
(structural_res_2 << 32) | (structural_res_3 << 48));
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "generic/stage1_find_marks_flatten.h"
|
||||||
|
#include "generic/stage1_find_marks.h"
|
||||||
|
|
||||||
|
} // namespace westmere
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
TARGET_WESTMERE
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||||
|
return westmere::find_structural_bits(buf, len, pj);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
UNTARGET_REGION
|
UNTARGET_REGION
|
||||||
|
|
||||||
#endif // IS_X86_64
|
#endif // IS_X86_64
|
||||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
|
|
@ -0,0 +1,34 @@
|
||||||
|
#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
|
||||||
|
#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
|
||||||
|
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
|
||||||
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
#include "simdjson/stage2_build_tape.h"
|
||||||
|
#include "westmere/architecture.h"
|
||||||
|
#include "westmere/stringparsing.h"
|
||||||
|
|
||||||
|
TARGET_WESTMERE
|
||||||
|
namespace simdjson::westmere {
|
||||||
|
|
||||||
|
#include "generic/stage2_build_tape.h"
|
||||||
|
|
||||||
|
} // namespace simdjson::westmere
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
TARGET_WESTMERE
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
WARN_UNUSED int
|
||||||
|
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
|
return westmere::unified_machine(buf, len, pj);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace simdjson
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
#endif // IS_X86_64
|
||||||
|
|
||||||
|
#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
|
|
@ -1,15 +1,16 @@
|
||||||
#ifndef SIMDJSON_STRINGPARSING_WESTMERE_H
|
#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H
|
||||||
#define SIMDJSON_STRINGPARSING_WESTMERE_H
|
#define SIMDJSON_WESTMERE_STRINGPARSING_H
|
||||||
|
|
||||||
#include "simdjson/stringparsing.h"
|
#include "../stringparsing.h"
|
||||||
|
|
||||||
#ifdef IS_X86_64
|
#ifdef IS_X86_64
|
||||||
|
|
||||||
|
#include "westmere/architecture.h"
|
||||||
|
|
||||||
TARGET_WESTMERE
|
TARGET_WESTMERE
|
||||||
namespace simdjson {
|
namespace simdjson::westmere {
|
||||||
template <>
|
|
||||||
really_inline parse_string_helper
|
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||||
find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
|
|
||||||
uint8_t *dst) {
|
|
||||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||||
// SIMDJSON_PADDING of padding
|
// SIMDJSON_PADDING of padding
|
||||||
__m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
|
__m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
|
||||||
|
@ -23,14 +24,11 @@ find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
|
||||||
static_cast<uint32_t>(_mm_movemask_epi8(quote_mask)) // quote_bits
|
static_cast<uint32_t>(_mm_movemask_epi8(quote_mask)) // quote_bits
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
||||||
|
|
||||||
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
|
#include "generic/stringparsing.h"
|
||||||
#define TARGETED_REGION TARGET_WESTMERE
|
|
||||||
#include "simdjson/stringparsing_common.h"
|
} // namespace simdjson::westmere
|
||||||
#undef TARGETED_ARCHITECTURE
|
UNTARGET_REGION
|
||||||
#undef TARGETED_REGION
|
|
||||||
|
|
||||||
#endif // IS_X86_64
|
#endif // IS_X86_64
|
||||||
|
|
Loading…
Reference in New Issue