Move architecture-specific headers to src/ (#287)
* Use namespaces instead of templates for stage1 impls * Move stage1 implementation into the src/ directory * Move architecture-specific code to src/
This commit is contained in:
parent
a1bff85263
commit
585f84a734
|
@ -7,10 +7,14 @@
|
|||
# Build outputs (TODO build to a subdir so we can exclude that instead)
|
||||
/allparserscheckfile
|
||||
/basictests
|
||||
/benchmark/parse
|
||||
/benchmark/perfdiff
|
||||
/benchmark/statisticalmodel
|
||||
/json2json
|
||||
/jsoncheck
|
||||
/jsonpointer
|
||||
/jsonstats
|
||||
/libsimdjson.so*
|
||||
/minify
|
||||
/numberparsingcheck
|
||||
/parse
|
||||
|
@ -25,8 +29,33 @@
|
|||
/simdjson.h
|
||||
/singleheader/amalgamation_demo
|
||||
/singleheader/demo
|
||||
/tests/basictests
|
||||
/tests/jsoncheck
|
||||
/tests/pointercheck
|
||||
/tools/json2json
|
||||
/tools/jsonstats
|
||||
/tools/minify
|
||||
# CMake ignore from https://github.com/github/gitignore/blob/master/CMake.gitignore
|
||||
|
||||
# Generic from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
|
||||
CMakeLists.txt.user
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
CMakeScripts
|
||||
Testing
|
||||
Makefile
|
||||
cmake_install.cmake
|
||||
install_manifest.txt
|
||||
compile_commands.json
|
||||
CTestTestfile.cmake
|
||||
_deps
|
||||
|
||||
# CMake files that may be specific to our installation
|
||||
/CPackConfig.cmake
|
||||
/CPackSourceConfig.cmake
|
||||
# We check in a custom version of root Makefile that is not generated by CMake
|
||||
!/Makefile
|
||||
|
||||
# C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
|
||||
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
|
8
Makefile
8
Makefile
|
@ -22,7 +22,7 @@ else
|
|||
ARCHFLAGS ?= -msse4.2 -mpclmul # lowest supported feature set?
|
||||
endif
|
||||
|
||||
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(EXTRAFLAGS)
|
||||
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Isrc -Ibenchmark/linux $(EXTRAFLAGS)
|
||||
CFLAGS = $(ARCHFLAGS) -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src $(EXTRAFLAGS)
|
||||
|
||||
|
||||
|
@ -63,7 +63,11 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
|
|||
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
|
||||
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
||||
|
||||
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten_common.h include/simdjson/stage1_find_marks_flatten_haswell.h
|
||||
# Load headers and sources
|
||||
LIBHEADERS=src/simd_input.h src/simdutf8check.h src/stringparsing.h src/arm64/architecture.h src/arm64/simd_input.h src/arm64/simdutf8check.h src/arm64/stage1_find_marks.h src/arm64/stage2_build_tape.h src/arm64/stringparsing.h src/generic/stage1_find_marks_flatten.h src/generic/stage1_find_marks.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/haswell/architecture.h src/haswell/simd_input.h src/haswell/simdutf8check.h src/haswell/stage1_find_marks.h src/haswell/stage2_build_tape.h src/haswell/stringparsing.h src/westmere/architecture.h src/westmere/simd_input.h src/westmere/simdutf8check.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
|
||||
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/numberparsing.h include/simdjson/padded_string.h include/simdjson/parsedjson.h include/simdjson/parsedjsoniterator.h include/simdjson/portability.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/simdprune_tables.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
|
||||
HEADERS=$(PUBHEADERS) $(LIBHEADERS)
|
||||
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
|
||||
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
|
||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||
|
|
|
@ -17,8 +17,28 @@ $SCRIPTPATH/src/simdjson.cpp
|
|||
$SCRIPTPATH/src/jsonioutil.cpp
|
||||
$SCRIPTPATH/src/jsonminifier.cpp
|
||||
$SCRIPTPATH/src/jsonparser.cpp
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
|
||||
$SCRIPTPATH/src/simd_input.h
|
||||
$SCRIPTPATH/src/arm64/architecture.h
|
||||
$SCRIPTPATH/src/haswell/architecture.h
|
||||
$SCRIPTPATH/src/westmere/architecture.h
|
||||
$SCRIPTPATH/src/arm64/simd_input.h
|
||||
$SCRIPTPATH/src/haswell/simd_input.h
|
||||
$SCRIPTPATH/src/westmere/simd_input.h
|
||||
$SCRIPTPATH/src/simdutf8check.h
|
||||
$SCRIPTPATH/src/arm64/simdutf8check.h
|
||||
$SCRIPTPATH/src/haswell/simdutf8check.h
|
||||
$SCRIPTPATH/src/westmere/simdutf8check.h
|
||||
$SCRIPTPATH/src/arm64/stage1_find_marks.h
|
||||
$SCRIPTPATH/src/haswell/stage1_find_marks.h
|
||||
$SCRIPTPATH/src/westmere/stage1_find_marks.h
|
||||
$SCRIPTPATH/src/stage1_find_marks.cpp
|
||||
$SCRIPTPATH/src/stringparsing.h
|
||||
$SCRIPTPATH/src/arm64/stringparsing.h
|
||||
$SCRIPTPATH/src/haswell/stringparsing.h
|
||||
$SCRIPTPATH/src/westmere/stringparsing.h
|
||||
$SCRIPTPATH/src/arm64/stage2_build_tape.h
|
||||
$SCRIPTPATH/src/haswell/stage2_build_tape.h
|
||||
$SCRIPTPATH/src/westmere/stage2_build_tape.h
|
||||
$SCRIPTPATH/src/stage2_build_tape.cpp
|
||||
$SCRIPTPATH/src/parsedjson.cpp
|
||||
$SCRIPTPATH/src/parsedjsoniterator.cpp
|
||||
|
@ -36,25 +56,10 @@ $SCRIPTPATH/include/simdjson/jsoncharutils.h
|
|||
$SCRIPTPATH/include/simdjson/jsonformatutils.h
|
||||
$SCRIPTPATH/include/simdjson/jsonioutil.h
|
||||
$SCRIPTPATH/include/simdjson/simdprune_tables.h
|
||||
$SCRIPTPATH/include/simdjson/simd_input.h
|
||||
$SCRIPTPATH/include/simdjson/simd_input_haswell.h
|
||||
$SCRIPTPATH/include/simdjson/simd_input_westmere.h
|
||||
$SCRIPTPATH/include/simdjson/simd_input_arm64.h
|
||||
$SCRIPTPATH/include/simdjson/simdutf8check.h
|
||||
$SCRIPTPATH/include/simdjson/simdutf8check_haswell.h
|
||||
$SCRIPTPATH/include/simdjson/simdutf8check_westmere.h
|
||||
$SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
|
||||
$SCRIPTPATH/include/simdjson/jsonminifier.h
|
||||
$SCRIPTPATH/include/simdjson/parsedjson.h
|
||||
$SCRIPTPATH/include/simdjson/parsedjsoniterator.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
|
||||
$SCRIPTPATH/include/simdjson/stringparsing.h
|
||||
$SCRIPTPATH/include/simdjson/stringparsing_westmere.h
|
||||
$SCRIPTPATH/include/simdjson/stringparsing_haswell.h
|
||||
$SCRIPTPATH/include/simdjson/stringparsing_arm64.h
|
||||
$SCRIPTPATH/include/simdjson/numberparsing.h
|
||||
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
|
||||
$SCRIPTPATH/include/simdjson/jsonparser.h
|
||||
|
@ -74,17 +79,27 @@ function dofile()
|
|||
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
|
||||
while IFS= read -r line
|
||||
do
|
||||
if [[ "${line}" == '#include "simdjson'* ]]; then
|
||||
# we paste the contents of simdjson header files with names ending by _common.h
|
||||
# we ignore every other simdjson headers
|
||||
if [[ "${line}" == '#include "simdjson/'*'_common.h"'* ]]; then
|
||||
file=$(echo $line| cut -d'"' -f 2)
|
||||
echo "$(<include/$file)" # we assume those files are always in include/
|
||||
fi
|
||||
else
|
||||
# Otherwise we simply copy the line
|
||||
echo "$line"
|
||||
if [[ "${line}" == '#include "'*'"'* ]]; then
|
||||
file=$(echo $line| cut -d'"' -f 2)
|
||||
|
||||
if [[ "${file}" == '../'* ]]; then
|
||||
file=$(echo $file| cut -d'/' -f 2-)
|
||||
fi;
|
||||
|
||||
# we ignore simdjson headers (except src/generic/*.h); they are handled in the above list
|
||||
if [ -f include/$file ]; then
|
||||
continue;
|
||||
elif [ -f src/$file ]; then
|
||||
# we paste the contents of src/generic/*.h
|
||||
if [[ "${file}" == *'generic/'*'.h' ]]; then
|
||||
echo "$(<src/$file)"
|
||||
fi;
|
||||
continue;
|
||||
fi;
|
||||
fi;
|
||||
|
||||
# Otherwise we simply copy the line
|
||||
echo "$line"
|
||||
done < "$1"
|
||||
echo "/* end file $RELFILE */"
|
||||
}
|
||||
|
|
|
@ -88,8 +88,7 @@ int unified_machine_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
}
|
||||
|
||||
// Responsible to select the best json_parse implementation
|
||||
int find_structural_bits_dispatch(const uint8_t *buf, size_t len,
|
||||
ParsedJson &pj) {
|
||||
int find_structural_bits_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
Architecture best_implementation = _find_best_supported_implementation();
|
||||
// Selecting the best implementation
|
||||
switch (best_implementation) {
|
||||
|
|
|
@ -1,35 +1,20 @@
|
|||
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include/simdjson)
|
||||
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
|
||||
set(SIMDJSON_INCLUDE
|
||||
${SIMDJSON_INCLUDE_DIR}/common_defs.h
|
||||
${SIMDJSON_INCLUDE_DIR}/isadetection.h
|
||||
${SIMDJSON_INCLUDE_DIR}/jsoncharutils.h
|
||||
${SIMDJSON_INCLUDE_DIR}/jsonformatutils.h
|
||||
${SIMDJSON_INCLUDE_DIR}/jsonioutil.h
|
||||
${SIMDJSON_INCLUDE_DIR}/jsonminifier.h
|
||||
${SIMDJSON_INCLUDE_DIR}/jsonparser.h
|
||||
${SIMDJSON_INCLUDE_DIR}/numberparsing.h
|
||||
${SIMDJSON_INCLUDE_DIR}/padded_string.h
|
||||
${SIMDJSON_INCLUDE_DIR}/parsedjson.h
|
||||
${SIMDJSON_INCLUDE_DIR}/parsedjsoniterator.h
|
||||
${SIMDJSON_INCLUDE_DIR}/portability.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson_version.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdprune_tables.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdutf8check_arm64.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdutf8check_haswell.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdutf8check_westmere.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_arm64.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_common.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_common.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_haswell.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_haswell.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_westmere.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape_common.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stringparsing.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_arm64.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_common.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_haswell.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stringparsing_westmere.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/common_defs.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/isadetection.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsoncharutils.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonformatutils.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonminifier.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonparser.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/numberparsing.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/padded_string.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjson.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjsoniterator.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/portability.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson_version.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/simdprune_tables.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/stage1_find_marks.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/stage2_build_tape.h
|
||||
)
|
|
@ -1,108 +1,19 @@
|
|||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
#include "simdjson/simd_input.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template <Architecture> uint64_t compute_quote_mask(uint64_t quote_bits);
|
||||
|
||||
namespace {
|
||||
// for when clmul is unavailable
|
||||
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
|
||||
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
|
||||
quote_mask = quote_mask ^ (quote_mask << 2);
|
||||
quote_mask = quote_mask ^ (quote_mask << 4);
|
||||
quote_mask = quote_mask ^ (quote_mask << 8);
|
||||
quote_mask = quote_mask ^ (quote_mask << 16);
|
||||
quote_mask = quote_mask ^ (quote_mask << 32);
|
||||
return quote_mask;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <Architecture T>
|
||||
really_inline uint64_t find_odd_backslash_sequences(
|
||||
simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash);
|
||||
|
||||
template <Architecture T>
|
||||
really_inline uint64_t find_quote_mask_and_bits(
|
||||
simd_input<T> in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote,
|
||||
uint64_t "e_bits, uint64_t &error_mask);
|
||||
|
||||
// do a 'shufti' to detect structural JSON characters
|
||||
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
||||
// these go into the first 3 buckets of the comparison (1/2/4)
|
||||
|
||||
// we are also interested in the four whitespace characters
|
||||
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
||||
// these go into the next 2 buckets of the comparison (8/16)
|
||||
template <Architecture T>
|
||||
void find_whitespace_and_structurals(simd_input<T> in, uint64_t &whitespace,
|
||||
uint64_t &structurals);
|
||||
|
||||
// return a updated structural bit vector with quoted contents cleared out and
|
||||
// pseudo-structural characters added to the mask
|
||||
// updates prev_iter_ends_pseudo_pred which tells us whether the previous
|
||||
// iteration ended on a whitespace or a structural character (which means that
|
||||
// the next iteration
|
||||
// will have a pseudo-structural character at its start)
|
||||
really_inline uint64_t finalize_structurals(
|
||||
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
|
||||
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
|
||||
// mask off anything inside quotes
|
||||
structurals &= ~quote_mask;
|
||||
// add the real quote bits back into our bit_mask as well, so we can
|
||||
// quickly traverse the strings we've spent all this trouble gathering
|
||||
structurals |= quote_bits;
|
||||
// Now, establish "pseudo-structural characters". These are non-whitespace
|
||||
// characters that are (a) outside quotes and (b) have a predecessor that's
|
||||
// either whitespace or a structural character. This means that subsequent
|
||||
// passes will get a chance to encounter the first character of every string
|
||||
// of non-whitespace and, if we're parsing an atom like true/false/null or a
|
||||
// number we can stop at the first whitespace or structural character
|
||||
// following it.
|
||||
|
||||
// a qualified predecessor is something that can happen 1 position before an
|
||||
// pseudo-structural character
|
||||
uint64_t pseudo_pred = structurals | whitespace;
|
||||
|
||||
uint64_t shifted_pseudo_pred =
|
||||
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
|
||||
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
|
||||
uint64_t pseudo_structurals =
|
||||
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
|
||||
structurals |= pseudo_structurals;
|
||||
|
||||
// now, we've used our close quotes all we need to. So let's switch them off
|
||||
// they will be off in the quote mask and on in quote bits.
|
||||
structurals &= ~(quote_bits & ~quote_mask);
|
||||
return structurals;
|
||||
}
|
||||
template <Architecture T = Architecture::NATIVE>
|
||||
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj);
|
||||
|
||||
template <Architecture T = Architecture::NATIVE>
|
||||
int find_structural_bits(const uint8_t *buf, size_t len,
|
||||
simdjson::ParsedJson &pj);
|
||||
|
||||
template <Architecture T = Architecture::NATIVE>
|
||||
int find_structural_bits(const char *buf, size_t len,
|
||||
simdjson::ParsedJson &pj) {
|
||||
int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||
return find_structural_bits((const uint8_t *)buf, len, pj);
|
||||
}
|
||||
|
||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||
// plus their position in the bitvector, and store these indexes at
|
||||
// base_ptr[base] incrementing base as we go
|
||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||
// needs to be large enough to handle this
|
||||
template <Architecture T = Architecture::NATIVE>
|
||||
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits);
|
||||
|
||||
} // namespace simdjson
|
||||
}; // namespace simdjson
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,90 +0,0 @@
|
|||
// This file provides the same function as
|
||||
// stage1_find_marks_flatten_common.h, but uses Intel intrinsics.
|
||||
// This should provide better performance on Visual Studio
|
||||
// and other compilers that do a conservative optimization.
|
||||
|
||||
// Specifically, on x64 processors with BMI,
|
||||
// x & (x - 1) should be mapped to
|
||||
// the blsr instruction. By using the
|
||||
// _blsr_u64 intrinsic, we
|
||||
// ensure that this will happen.
|
||||
/////////
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
|
||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||
// plus their position in the bitvector, and store these indexes at
|
||||
// base_ptr[base] incrementing base as we go
|
||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||
// needs to be large enough to handle this
|
||||
template<>
|
||||
really_inline void flatten_bits<Architecture::HASWELL>(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
// In some instances, the next branch is expensive because it is mispredicted.
|
||||
// Unfortunately, in other cases,
|
||||
// it helps tremendously.
|
||||
if (bits == 0)
|
||||
return;
|
||||
uint32_t cnt = _mm_popcnt_u64(bits);
|
||||
uint32_t next_base = base + cnt;
|
||||
idx -= 64;
|
||||
base_ptr += base;
|
||||
{
|
||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[1] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[2] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[3] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[4] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[5] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[6] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[7] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr += 8;
|
||||
}
|
||||
// We hope that the next branch is easily predicted.
|
||||
if (cnt > 8) {
|
||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[1] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[2] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[3] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[4] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[5] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[6] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[7] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr += 8;
|
||||
}
|
||||
if (cnt > 16) { // unluckly: we rarely get here
|
||||
// since it means having one structural or pseudo-structral element
|
||||
// every 4 characters (possible with inputs like "","","",...).
|
||||
do {
|
||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr++;
|
||||
} while (bits != 0);
|
||||
}
|
||||
base = next_base;
|
||||
}
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
#endif // IS_X86_64
|
|
@ -1,116 +0,0 @@
|
|||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
||||
|
||||
#include "simdjson/simd_input_haswell.h"
|
||||
#include "simdjson/simdutf8check_haswell.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
really_inline uint64_t
|
||||
compute_quote_mask<Architecture::HASWELL>(uint64_t quote_bits) {
|
||||
// There should be no such thing with a processing supporting avx2
|
||||
// but not clmul.
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
||||
return quote_mask;
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline void find_whitespace_and_structurals<Architecture::HASWELL>(
|
||||
simd_input<Architecture::HASWELL> in, uint64_t &whitespace,
|
||||
uint64_t &structurals) {
|
||||
#ifdef SIMDJSON_NAIVE_STRUCTURAL
|
||||
// You should never need this naive approach, but it can be useful
|
||||
// for research purposes
|
||||
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
|
||||
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
|
||||
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
|
||||
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
|
||||
struct_lo =
|
||||
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_brace));
|
||||
struct_hi =
|
||||
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_brace));
|
||||
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
|
||||
struct_lo =
|
||||
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_open_bracket));
|
||||
struct_hi =
|
||||
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_open_bracket));
|
||||
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
|
||||
struct_lo =
|
||||
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_bracket));
|
||||
struct_hi =
|
||||
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_bracket));
|
||||
const __m256i mask_column = _mm256_set1_epi8(0x3a);
|
||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_column));
|
||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_column));
|
||||
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
|
||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_comma));
|
||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_comma));
|
||||
uint64_t structural_res_0 =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
|
||||
structurals = (structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
const __m256i mask_space = _mm256_set1_epi8(0x20);
|
||||
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
|
||||
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
|
||||
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
|
||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_linefeed));
|
||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_linefeed));
|
||||
const __m256i mask_tab = _mm256_set1_epi8(0x09);
|
||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_tab));
|
||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_tab));
|
||||
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
|
||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_carriage));
|
||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_carriage));
|
||||
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
|
||||
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
// end of naive approach
|
||||
|
||||
#else // SIMDJSON_NAIVE_STRUCTURAL
|
||||
// clang-format off
|
||||
const __m256i structural_table =
|
||||
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
|
||||
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
||||
const __m256i white_table = _mm256_setr_epi8(
|
||||
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
|
||||
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
|
||||
// clang-format on
|
||||
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
|
||||
const __m256i struct_mask = _mm256_set1_epi8(32);
|
||||
|
||||
__m256i lo_white =
|
||||
_mm256_cmpeq_epi8(in.lo, _mm256_shuffle_epi8(white_table, in.lo));
|
||||
__m256i hi_white =
|
||||
_mm256_cmpeq_epi8(in.hi, _mm256_shuffle_epi8(white_table, in.hi));
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_white));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(hi_white);
|
||||
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
__m256i lo_struct_r1 = _mm256_add_epi8(struct_offset, in.lo);
|
||||
__m256i hi_struct_r1 = _mm256_add_epi8(struct_offset, in.hi);
|
||||
__m256i lo_struct_r2 = _mm256_or_si256(in.lo, struct_mask);
|
||||
__m256i hi_struct_r2 = _mm256_or_si256(in.hi, struct_mask);
|
||||
__m256i lo_struct_r3 = _mm256_shuffle_epi8(structural_table, lo_struct_r1);
|
||||
__m256i hi_struct_r3 = _mm256_shuffle_epi8(structural_table, hi_struct_r1);
|
||||
__m256i lo_struct = _mm256_cmpeq_epi8(lo_struct_r2, lo_struct_r3);
|
||||
__m256i hi_struct = _mm256_cmpeq_epi8(hi_struct_r2, hi_struct_r3);
|
||||
|
||||
uint64_t structural_res_0 =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(lo_struct));
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(hi_struct);
|
||||
structurals = (structural_res_0 | (structural_res_1 << 32));
|
||||
#endif // SIMDJSON_NAIVE_STRUCTURAL
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
|
@ -10,7 +10,6 @@
|
|||
#include "simdjson/numberparsing.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
#include "simdjson/stringparsing.h"
|
||||
|
||||
namespace simdjson {
|
||||
void init_state_machine();
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Wed Aug 14 13:56:54 DST 2019. Do not edit! */
|
||||
/* auto-generated on Sun Aug 18 15:06:50 DST 2019. Do not edit! */
|
||||
|
||||
#include <iostream>
|
||||
#include "simdjson.h"
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -18,23 +18,55 @@ MESSAGE( STATUS "SIMDJSON_LIB_TYPE: " ${SIMDJSON_LIB_TYPE})
|
|||
|
||||
# Bring in include files
|
||||
include(../include/CMakeLists.txt)
|
||||
set(SIMDJSON_SRC
|
||||
jsonioutil.cpp
|
||||
jsonminifier.cpp
|
||||
jsonparser.cpp
|
||||
stage1_find_marks.cpp
|
||||
stage2_build_tape.cpp
|
||||
parsedjson.cpp
|
||||
parsedjsoniterator.cpp
|
||||
simdjson.cpp
|
||||
${SIMDJSON_INCLUDE}
|
||||
)
|
||||
|
||||
add_library(${SIMDJSON_LIB_NAME} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC})
|
||||
set(SIMDJSON_SRC_DIR $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>)
|
||||
|
||||
set(SIMDJSON_SRC
|
||||
jsonioutil.cpp
|
||||
jsonminifier.cpp
|
||||
jsonparser.cpp
|
||||
stage1_find_marks.cpp
|
||||
stage2_build_tape.cpp
|
||||
parsedjson.cpp
|
||||
parsedjsoniterator.cpp
|
||||
simdjson.cpp
|
||||
)
|
||||
|
||||
# Load headers and sources
|
||||
set(SIMDJSON_SRC_HEADERS
|
||||
arm64/architecture.h
|
||||
arm64/simd_input.h
|
||||
arm64/simdutf8check.h
|
||||
arm64/stage1_find_marks.h
|
||||
arm64/stage2_build_tape.h
|
||||
arm64/stringparsing.h
|
||||
generic/stage1_find_marks_flatten.h
|
||||
generic/stage1_find_marks.h
|
||||
generic/stage2_build_tape.h
|
||||
generic/stringparsing.h
|
||||
haswell/architecture.h
|
||||
haswell/simd_input.h
|
||||
haswell/simdutf8check.h
|
||||
haswell/stage1_find_marks.h
|
||||
haswell/stage2_build_tape.h
|
||||
haswell/stringparsing.h
|
||||
westmere/architecture.h
|
||||
westmere/simd_input.h
|
||||
westmere/simdutf8check.h
|
||||
westmere/stage1_find_marks.h
|
||||
westmere/stage2_build_tape.h
|
||||
westmere/stringparsing.h
|
||||
simd_input.h
|
||||
simdutf8check.h
|
||||
stringparsing.h
|
||||
)
|
||||
|
||||
add_library(${SIMDJSON_LIB_NAME} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC} ${SIMDJSON_INCLUDE} ${SIMDJSON_SRC_HEADERS})
|
||||
|
||||
target_include_directories(${SIMDJSON_LIB_NAME}
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
|
||||
$<BUILD_INTERFACE:${SIMDJSON_SRC_DIR}>
|
||||
$<BUILD_INTERFACE:${SIMDJSON_INCLUDE_DIR}>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef SIMDJSON_ARM64_ARCHITECTURE_H
|
||||
#define SIMDJSON_ARM64_ARCHITECTURE_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_ARM64
|
||||
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
static const Architecture ARCHITECTURE = Architecture::ARM64;
|
||||
|
||||
} // namespace simdjson::arm64
|
||||
|
||||
#endif // IS_ARM64
|
||||
|
||||
#endif // SIMDJSON_ARM64_ARCHITECTURE_H
|
|
@ -1,9 +1,10 @@
|
|||
#ifndef SIMDJSON_SIMD_INPUT_ARM64_H
|
||||
#define SIMDJSON_SIMD_INPUT_ARM64_H
|
||||
#ifndef SIMDJSON_ARM64_SIMD_INPUT_H
|
||||
#define SIMDJSON_ARM64_SIMD_INPUT_H
|
||||
|
||||
#include "simdjson/simd_input.h"
|
||||
#include "../simd_input.h"
|
||||
|
||||
#ifdef IS_ARM64
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
really_inline uint16_t neon_movemask(uint8x16_t input) {
|
||||
|
@ -68,4 +69,4 @@ struct simd_input<Architecture::ARM64> {
|
|||
} // namespace simdjson
|
||||
|
||||
#endif // IS_ARM64
|
||||
#endif // SIMDJSON_SIMD_INPUT_ARM64_H
|
||||
#endif // SIMDJSON_ARM64_SIMD_INPUT_H
|
|
@ -1,13 +1,13 @@
|
|||
// From https://github.com/cyb70289/utf8/blob/master/lemire-neon.c
|
||||
// Adapted from https://github.com/lemire/fastvalidate-utf-8
|
||||
|
||||
#ifndef SIMDJSON_SIMDUTF8CHECK_ARM64_H
|
||||
#define SIMDJSON_SIMDUTF8CHECK_ARM64_H
|
||||
#ifndef SIMDJSON_ARM64_SIMDUTF8CHECK_H
|
||||
#define SIMDJSON_ARM64_SIMDUTF8CHECK_H
|
||||
|
||||
#if defined(_ARM_NEON) || defined(__aarch64__) || \
|
||||
(defined(_MSC_VER) && defined(_M_ARM64))
|
||||
|
||||
#include "simdjson/simdutf8check.h"
|
||||
#include "../simdutf8check.h"
|
||||
#include <arm_neon.h>
|
||||
#include <cinttypes>
|
||||
#include <cstddef>
|
||||
|
@ -31,7 +31,7 @@
|
|||
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
||||
*
|
||||
*/
|
||||
namespace simdjson {
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
// all byte values must be no larger than 0xF4
|
||||
static inline void check_smaller_than_0xF4(int8x16_t current_bytes,
|
||||
|
@ -191,6 +191,12 @@ really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) {
|
|||
return vget_lane_u64(result, 0) == 0;
|
||||
}
|
||||
|
||||
} // namespace simdjson::arm64
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
using namespace simdjson::arm64;
|
||||
|
||||
template <>
|
||||
struct utf8_checker<Architecture::ARM64> {
|
||||
int8x16_t has_error{};
|
|
@ -1,16 +1,19 @@
|
|||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
||||
#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
|
||||
#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
|
||||
|
||||
#include "simdjson/simd_input_arm64.h"
|
||||
#include "simdjson/simdutf8check_arm64.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_ARM64
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
really_inline uint64_t
|
||||
compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
|
||||
#include "arm64/architecture.h"
|
||||
#include "arm64/simd_input.h"
|
||||
#include "arm64/simdutf8check.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
|
||||
|
||||
#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
|
||||
return vmull_p64(-1ULL, quote_bits);
|
||||
#else
|
||||
|
@ -18,9 +21,8 @@ compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
|
|||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
|
||||
simd_input<Architecture::ARM64> in, uint64_t &whitespace,
|
||||
static really_inline void find_whitespace_and_structurals(
|
||||
simd_input<ARCHITECTURE> in, uint64_t &whitespace,
|
||||
uint64_t &structurals) {
|
||||
const uint8x16_t low_nibble_mask =
|
||||
(uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
|
||||
|
@ -66,7 +68,20 @@ really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
|
|||
uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
|
||||
whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
|
||||
}
|
||||
|
||||
#include "generic/stage1_find_marks_flatten.h"
|
||||
#include "generic/stage1_find_marks.h"
|
||||
|
||||
} // namespace simdjson::arm64
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||
return arm64::find_structural_bits(buf, len, pj);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // IS_ARM64
|
||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
||||
#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
|
||||
#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_ARM64
|
||||
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
#include "arm64/architecture.h"
|
||||
#include "arm64/stringparsing.h"
|
||||
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
#include "generic/stage2_build_tape.h"
|
||||
|
||||
} // namespace simdjson::arm64
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
return arm64::unified_machine(buf, len, pj);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // IS_ARM64
|
||||
|
||||
#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
|
|
@ -1,14 +1,15 @@
|
|||
#ifndef SIMDJSON_STRINGPARSING_ARM64_H
|
||||
#define SIMDJSON_STRINGPARSING_ARM64_H
|
||||
#ifndef SIMDJSON_ARM64_STRINGPARSING_H
|
||||
#define SIMDJSON_ARM64_STRINGPARSING_H
|
||||
|
||||
#include "simdjson/stringparsing.h"
|
||||
#include "../stringparsing.h"
|
||||
|
||||
#ifdef IS_ARM64
|
||||
namespace simdjson {
|
||||
template <>
|
||||
really_inline parse_string_helper
|
||||
find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
|
||||
uint8_t *dst) {
|
||||
|
||||
#include "arm64/architecture.h"
|
||||
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
|
||||
|
@ -39,15 +40,13 @@ find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
|
|||
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits
|
||||
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
#include "generic/stringparsing.h"
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::ARM64
|
||||
#define TARGETED_REGION TARGET_ARM64
|
||||
#include "simdjson/stringparsing_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
}
|
||||
// namespace simdjson::amd64
|
||||
|
||||
#endif // IS_ARM64
|
||||
#endif
|
|
@ -1,14 +1,8 @@
|
|||
// This file contains the common code every implementation uses in stage1
|
||||
// It is intended to be included multiple times and compiled multiple times
|
||||
// We assume the file in which it is include already includes
|
||||
// We assume the file in which it is included already includes
|
||||
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
||||
|
||||
#ifdef TARGETED_ARCHITECTURE
|
||||
#ifdef TARGETED_REGION
|
||||
|
||||
TARGETED_REGION
|
||||
namespace simdjson {
|
||||
|
||||
// return a bitvector indicating where we have characters that end an odd-length
|
||||
// sequence of backslashes (and thus change the behavior of the next character
|
||||
// to follow). A even-length sequence of backslashes, and, for that matter, the
|
||||
|
@ -18,9 +12,8 @@ namespace simdjson {
|
|||
// indicate whether we end an iteration on an odd-length sequence of
|
||||
// backslashes, which modifies our subsequent search for odd-length
|
||||
// sequences of backslashes in an obvious way.
|
||||
template <>
|
||||
really_inline uint64_t find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
|
||||
simd_input<TARGETED_ARCHITECTURE> in,
|
||||
really_inline uint64_t find_odd_backslash_sequences(
|
||||
simd_input<ARCHITECTURE> in,
|
||||
uint64_t &prev_iter_ends_odd_backslash) {
|
||||
const uint64_t even_bits = 0x5555555555555555ULL;
|
||||
const uint64_t odd_bits = ~even_bits;
|
||||
|
@ -66,14 +59,13 @@ really_inline uint64_t find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
|
|||
// Note that we don't do any error checking to see if we have backslash
|
||||
// sequences outside quotes; these
|
||||
// backslash sequences (of any length) will be detected elsewhere.
|
||||
template <>
|
||||
really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
|
||||
simd_input<TARGETED_ARCHITECTURE> in, uint64_t odd_ends,
|
||||
really_inline uint64_t find_quote_mask_and_bits(
|
||||
simd_input<ARCHITECTURE> in, uint64_t odd_ends,
|
||||
uint64_t &prev_iter_inside_quote, uint64_t "e_bits,
|
||||
uint64_t &error_mask) {
|
||||
quote_bits = in.eq('"');
|
||||
quote_bits = quote_bits & ~odd_ends;
|
||||
uint64_t quote_mask = compute_quote_mask<TARGETED_ARCHITECTURE>(quote_bits);
|
||||
uint64_t quote_mask = compute_quote_mask(quote_bits);
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
/* All Unicode characters may be placed within the
|
||||
* quotation marks, except for the characters that MUST be escaped:
|
||||
|
@ -90,33 +82,65 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
|
|||
return quote_mask;
|
||||
}
|
||||
|
||||
really_inline uint64_t finalize_structurals(
|
||||
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
|
||||
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
|
||||
// mask off anything inside quotes
|
||||
structurals &= ~quote_mask;
|
||||
// add the real quote bits back into our bit_mask as well, so we can
|
||||
// quickly traverse the strings we've spent all this trouble gathering
|
||||
structurals |= quote_bits;
|
||||
// Now, establish "pseudo-structural characters". These are non-whitespace
|
||||
// characters that are (a) outside quotes and (b) have a predecessor that's
|
||||
// either whitespace or a structural character. This means that subsequent
|
||||
// passes will get a chance to encounter the first character of every string
|
||||
// of non-whitespace and, if we're parsing an atom like true/false/null or a
|
||||
// number we can stop at the first whitespace or structural character
|
||||
// following it.
|
||||
|
||||
// a qualified predecessor is something that can happen 1 position before an
|
||||
// pseudo-structural character
|
||||
uint64_t pseudo_pred = structurals | whitespace;
|
||||
|
||||
uint64_t shifted_pseudo_pred =
|
||||
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
|
||||
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
|
||||
uint64_t pseudo_structurals =
|
||||
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
|
||||
structurals |= pseudo_structurals;
|
||||
|
||||
// now, we've used our close quotes all we need to. So let's switch them off
|
||||
// they will be off in the quote mask and on in quote bits.
|
||||
structurals &= ~(quote_bits & ~quote_mask);
|
||||
return structurals;
|
||||
}
|
||||
|
||||
// Find structural bits in a 64-byte chunk.
|
||||
really_inline void find_structural_bits_64(
|
||||
const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base,
|
||||
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
|
||||
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
|
||||
uint64_t &error_mask,
|
||||
utf8_checker<TARGETED_ARCHITECTURE> &utf8_state) {
|
||||
simd_input<TARGETED_ARCHITECTURE> in(buf);
|
||||
utf8_checker<ARCHITECTURE> &utf8_state) {
|
||||
simd_input<ARCHITECTURE> in(buf);
|
||||
utf8_state.check_next_input(in);
|
||||
/* detect odd sequences of backslashes */
|
||||
uint64_t odd_ends = find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
|
||||
uint64_t odd_ends = find_odd_backslash_sequences(
|
||||
in, prev_iter_ends_odd_backslash);
|
||||
|
||||
/* detect insides of quote pairs ("quote_mask") and also our quote_bits
|
||||
* themselves */
|
||||
uint64_t quote_bits;
|
||||
uint64_t quote_mask = find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
|
||||
uint64_t quote_mask = find_quote_mask_and_bits(
|
||||
in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
|
||||
|
||||
/* take the previous iterations structural bits, not our current
|
||||
* iteration,
|
||||
* and flatten */
|
||||
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
|
||||
flatten_bits(base_ptr, base, idx, structurals);
|
||||
|
||||
uint64_t whitespace;
|
||||
find_whitespace_and_structurals<TARGETED_ARCHITECTURE>(in, whitespace,
|
||||
structurals);
|
||||
find_whitespace_and_structurals(in, whitespace, structurals);
|
||||
|
||||
/* fixup structurals to reflect quotes and add pseudo-structural
|
||||
* characters */
|
||||
|
@ -124,9 +148,7 @@ really_inline void find_structural_bits_64(
|
|||
quote_bits, prev_iter_ends_pseudo_pred);
|
||||
}
|
||||
|
||||
template <>
|
||||
int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
||||
ParsedJson &pj) {
|
||||
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||
if (len > pj.byte_capacity) {
|
||||
std::cerr << "Your ParsedJson object only supports documents up to "
|
||||
<< pj.byte_capacity << " bytes but you are trying to process "
|
||||
|
@ -135,7 +157,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
|||
}
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
uint32_t base = 0;
|
||||
utf8_checker<TARGETED_ARCHITECTURE> utf8_state;
|
||||
utf8_checker<ARCHITECTURE> utf8_state;
|
||||
|
||||
/* we have padded the input out to 64 byte multiple with the remainder
|
||||
* being zeros persistent state across loop does the last iteration end
|
||||
|
@ -194,7 +216,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
|||
|
||||
/* finally, flatten out the remaining structurals from the last iteration
|
||||
*/
|
||||
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
|
||||
flatten_bits(base_ptr, base, idx, structurals);
|
||||
|
||||
pj.n_structural_indexes = base;
|
||||
/* a valid JSON file cannot have zero structural indexes - we should have
|
||||
|
@ -217,13 +239,3 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
|||
}
|
||||
return utf8_state.errors();
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#else
|
||||
#error TARGETED_REGION must be specified before including.
|
||||
#endif // TARGETED_REGION
|
||||
#else
|
||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
||||
#endif // TARGETED_ARCHITECTURE
|
|
@ -3,20 +3,12 @@
|
|||
// We assume the file in which it is include already includes
|
||||
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
||||
|
||||
#ifdef TARGETED_ARCHITECTURE
|
||||
#ifdef TARGETED_REGION
|
||||
|
||||
TARGETED_REGION
|
||||
namespace simdjson {
|
||||
|
||||
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
|
||||
//
|
||||
|
||||
// This is just a naive implementation. It should be normally
|
||||
// disable, but can be used for research purposes to compare
|
||||
// again our optimized version.
|
||||
template <>
|
||||
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
|
||||
uint32_t *out_ptr = base_ptr + base;
|
||||
idx -= 64;
|
||||
while (bits != 0) {
|
||||
|
@ -27,15 +19,14 @@ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint3
|
|||
base = (out_ptr - base_ptr);
|
||||
}
|
||||
|
||||
#else
|
||||
#else // SIMDJSON_NAIVE_FLATTEN
|
||||
|
||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||
// plus their position in the bitvector, and store these indexes at
|
||||
// base_ptr[base] incrementing base as we go
|
||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||
// needs to be large enough to handle this
|
||||
template<>
|
||||
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
|
||||
// In some instances, the next branch is expensive because it is mispredicted.
|
||||
// Unfortunately, in other cases,
|
||||
// it helps tremendously.
|
||||
|
@ -96,13 +87,3 @@ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint3
|
|||
base = next_base;
|
||||
}
|
||||
#endif // SIMDJSON_NAIVE_FLATTEN
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#else
|
||||
#error TARGETED_REGION must be specified before including.
|
||||
#endif // TARGETED_REGION
|
||||
#else
|
||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
||||
#endif // TARGETED_ARCHITECTURE
|
|
@ -3,12 +3,6 @@
|
|||
// We assume the file in which it is include already includes
|
||||
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
|
||||
|
||||
#ifdef TARGETED_ARCHITECTURE
|
||||
#ifdef TARGETED_REGION
|
||||
|
||||
TARGETED_REGION
|
||||
namespace simdjson {
|
||||
|
||||
// this macro reads the next structural character, updating idx, i and c.
|
||||
#define UPDATE_CHAR() \
|
||||
{ \
|
||||
|
@ -41,10 +35,8 @@ namespace simdjson {
|
|||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
||||
ParsedJson &pj) {
|
||||
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */
|
||||
uint32_t idx; /* location of the structural character in the input (buf) */
|
||||
uint8_t c; /* used to track the (structural) character we are looking at,
|
||||
|
@ -100,7 +92,7 @@ unified_machine<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
|||
* https://tools.ietf.org/html/rfc8259
|
||||
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */
|
||||
case '"': {
|
||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
|
@ -229,7 +221,7 @@ object_begin:
|
|||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
|
@ -248,7 +240,7 @@ object_key_state:
|
|||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
|
@ -333,7 +325,7 @@ object_continue:
|
|||
if (c != '"') {
|
||||
goto fail;
|
||||
} else {
|
||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
|
@ -367,7 +359,7 @@ main_array_switch:
|
|||
* on paths that can accept a close square brace (post-, and at start) */
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
|
@ -523,13 +515,3 @@ fail:
|
|||
pj.error_code = simdjson::TAPE_ERROR;
|
||||
return pj.error_code;
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#else
|
||||
#error TARGETED_REGION must be specified before including.
|
||||
#endif // TARGETED_REGION
|
||||
#else
|
||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
||||
#endif // TARGETED_ARCHITECTURE
|
|
@ -1,28 +1,19 @@
|
|||
// This file contains the common code every implementation uses
|
||||
// It is intended to be included multiple times and compiled multiple times
|
||||
// We assume the file in which it is include already includes
|
||||
// "simdjson/stringparsing.h" (this simplifies amalgation)
|
||||
// "stringparsing.h" (this simplifies amalgation)
|
||||
|
||||
#ifdef TARGETED_ARCHITECTURE
|
||||
#ifdef TARGETED_REGION
|
||||
|
||||
TARGETED_REGION
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
WARN_UNUSED
|
||||
really_inline bool
|
||||
parse_string<TARGETED_ARCHITECTURE>(UNUSED const uint8_t *buf,
|
||||
UNUSED size_t len, ParsedJson &pj,
|
||||
UNUSED const uint32_t depth,
|
||||
UNUSED uint32_t offset) {
|
||||
WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
|
||||
UNUSED size_t len, ParsedJson &pj,
|
||||
UNUSED const uint32_t depth,
|
||||
UNUSED uint32_t offset) {
|
||||
pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
|
||||
const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */
|
||||
uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t);
|
||||
const uint8_t *const start_of_string = dst;
|
||||
while (1) {
|
||||
parse_string_helper helper =
|
||||
find_bs_bits_and_quote_bits<TARGETED_ARCHITECTURE>(src, dst);
|
||||
find_bs_bits_and_quote_bits(src, dst);
|
||||
if (((helper.bs_bits - 1) & helper.quote_bits) != 0) {
|
||||
/* we encountered quotes first. Move dst to point to quotes and exit
|
||||
*/
|
||||
|
@ -80,7 +71,7 @@ WARN_UNUSED
|
|||
} else {
|
||||
/* they are the same. Since they can't co-occur, it means we
|
||||
* encountered neither. */
|
||||
if constexpr (TARGETED_ARCHITECTURE == Architecture::WESTMERE) {
|
||||
if constexpr (ARCHITECTURE == Architecture::WESTMERE) {
|
||||
src += 16;
|
||||
dst += 16;
|
||||
} else {
|
||||
|
@ -92,13 +83,3 @@ WARN_UNUSED
|
|||
/* can't be reached */
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#else
|
||||
#error TARGETED_REGION must be specified before including.
|
||||
#endif // TARGETED_REGION
|
||||
#else
|
||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
||||
#endif // TARGETED_ARCHITECTURE
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef SIMDJSON_HASWELL_ARCHITECTURE_H
|
||||
#define SIMDJSON_HASWELL_ARCHITECTURE_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
|
||||
namespace simdjson::haswell {
|
||||
|
||||
static const Architecture ARCHITECTURE = Architecture::HASWELL;
|
||||
|
||||
} // namespace simdjson::haswell
|
||||
|
||||
|
||||
#endif // IS_X86_64
|
||||
|
||||
#endif // SIMDJSON_HASWELL_ARCHITECTURE_H
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef SIMDJSON_SIMD_INPUT_HASWELL_H
|
||||
#define SIMDJSON_SIMD_INPUT_HASWELL_H
|
||||
#ifndef SIMDJSON_HASWELL_SIMD_INPUT_H
|
||||
#define SIMDJSON_HASWELL_SIMD_INPUT_H
|
||||
|
||||
#include "simdjson/simd_input.h"
|
||||
#include "../simd_input.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
|
@ -42,4 +42,4 @@ struct simd_input<Architecture::HASWELL> {
|
|||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
#endif // SIMDJSON_SIMD_INPUT_HASWELL_H
|
||||
#endif // SIMDJSON_HASWELL_SIMD_INPUT_H
|
|
@ -1,8 +1,8 @@
|
|||
#ifndef SIMDJSON_SIMDUTF8CHECK_HASWELL_H
|
||||
#define SIMDJSON_SIMDUTF8CHECK_HASWELL_H
|
||||
#ifndef SIMDJSON_HASWELL_SIMDUTF8CHECK_H
|
||||
#define SIMDJSON_HASWELL_SIMDUTF8CHECK_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/simdutf8check.h"
|
||||
#include "../simdutf8check.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
@ -28,7 +28,8 @@
|
|||
// all byte values must be no larger than 0xF4
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
namespace simdjson::haswell {
|
||||
|
||||
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
|
||||
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
|
||||
}
|
||||
|
@ -192,6 +193,14 @@ avx_check_utf8_bytes(__m256i current_bytes,
|
|||
return pb;
|
||||
}
|
||||
|
||||
}; // namespace simdjson::haswell
|
||||
UNTARGET_REGION // haswell
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
|
||||
using namespace simdjson::haswell;
|
||||
|
||||
template <>
|
||||
struct utf8_checker<Architecture::HASWELL> {
|
||||
__m256i has_error;
|
||||
|
@ -230,7 +239,7 @@ struct utf8_checker<Architecture::HASWELL> {
|
|||
}
|
||||
}; // struct utf8_checker
|
||||
|
||||
} // namespace simdjson
|
||||
}; // namespace simdjson
|
||||
UNTARGET_REGION // haswell
|
||||
|
||||
#endif // IS_X86_64
|
|
@ -0,0 +1,185 @@
|
|||
#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
|
||||
#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "haswell/architecture.h"
|
||||
#include "haswell/simd_input.h"
|
||||
#include "haswell/simdutf8check.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson::haswell {
|
||||
|
||||
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
|
||||
// There should be no such thing with a processing supporting avx2
|
||||
// but not clmul.
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
||||
return quote_mask;
|
||||
}
|
||||
|
||||
static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
||||
uint64_t &whitespace, uint64_t &structurals) {
|
||||
|
||||
#ifdef SIMDJSON_NAIVE_STRUCTURAL
|
||||
// You should never need this naive approach, but it can be useful
|
||||
// for research purposes
|
||||
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
|
||||
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
|
||||
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
|
||||
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
|
||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_brace));
|
||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_brace));
|
||||
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
|
||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_open_bracket));
|
||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_open_bracket));
|
||||
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
|
||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_bracket));
|
||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_bracket));
|
||||
const __m256i mask_column = _mm256_set1_epi8(0x3a);
|
||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_column));
|
||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_column));
|
||||
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
|
||||
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_comma));
|
||||
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_comma));
|
||||
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
|
||||
structurals = (structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
const __m256i mask_space = _mm256_set1_epi8(0x20);
|
||||
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
|
||||
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
|
||||
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
|
||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_linefeed));
|
||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_linefeed));
|
||||
const __m256i mask_tab = _mm256_set1_epi8(0x09);
|
||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_tab));
|
||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_tab));
|
||||
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
|
||||
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_carriage));
|
||||
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_carriage));
|
||||
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
|
||||
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
// end of naive approach
|
||||
|
||||
#else // SIMDJSON_NAIVE_STRUCTURAL
|
||||
// clang-format off
|
||||
const __m256i structural_table =
|
||||
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
|
||||
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
||||
const __m256i white_table = _mm256_setr_epi8(
|
||||
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
|
||||
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
|
||||
// clang-format on
|
||||
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
|
||||
const __m256i struct_mask = _mm256_set1_epi8(32);
|
||||
|
||||
__m256i lo_white = _mm256_cmpeq_epi8(in.lo, _mm256_shuffle_epi8(white_table, in.lo));
|
||||
__m256i hi_white = _mm256_cmpeq_epi8(in.hi, _mm256_shuffle_epi8(white_table, in.hi));
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_white));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(hi_white);
|
||||
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
__m256i lo_struct_r1 = _mm256_add_epi8(struct_offset, in.lo);
|
||||
__m256i hi_struct_r1 = _mm256_add_epi8(struct_offset, in.hi);
|
||||
__m256i lo_struct_r2 = _mm256_or_si256(in.lo, struct_mask);
|
||||
__m256i hi_struct_r2 = _mm256_or_si256(in.hi, struct_mask);
|
||||
__m256i lo_struct_r3 = _mm256_shuffle_epi8(structural_table, lo_struct_r1);
|
||||
__m256i hi_struct_r3 = _mm256_shuffle_epi8(structural_table, hi_struct_r1);
|
||||
__m256i lo_struct = _mm256_cmpeq_epi8(lo_struct_r2, lo_struct_r3);
|
||||
__m256i hi_struct = _mm256_cmpeq_epi8(hi_struct_r2, hi_struct_r3);
|
||||
|
||||
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_struct));
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(hi_struct);
|
||||
structurals = (structural_res_0 | (structural_res_1 << 32));
|
||||
#endif // else SIMDJSON_NAIVE_STRUCTURAL
|
||||
}
|
||||
|
||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||
// plus their position in the bitvector, and store these indexes at
|
||||
// base_ptr[base] incrementing base as we go
|
||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||
// needs to be large enough to handle this
|
||||
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
|
||||
// In some instances, the next branch is expensive because it is mispredicted.
|
||||
// Unfortunately, in other cases,
|
||||
// it helps tremendously.
|
||||
if (bits == 0)
|
||||
return;
|
||||
uint32_t cnt = _mm_popcnt_u64(bits);
|
||||
uint32_t next_base = base + cnt;
|
||||
idx -= 64;
|
||||
base_ptr += base;
|
||||
{
|
||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[1] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[2] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[3] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[4] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[5] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[6] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[7] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr += 8;
|
||||
}
|
||||
// We hope that the next branch is easily predicted.
|
||||
if (cnt > 8) {
|
||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[1] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[2] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[3] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[4] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[5] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[6] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr[7] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr += 8;
|
||||
}
|
||||
if (cnt > 16) { // unluckly: we rarely get here
|
||||
// since it means having one structural or pseudo-structral element
|
||||
// every 4 characters (possible with inputs like "","","",...).
|
||||
do {
|
||||
base_ptr[0] = idx + trailing_zeroes(bits);
|
||||
bits = _blsr_u64(bits);
|
||||
base_ptr++;
|
||||
} while (bits != 0);
|
||||
}
|
||||
base = next_base;
|
||||
}
|
||||
|
||||
#include "generic/stage1_find_marks.h"
|
||||
|
||||
} // namespace haswell
|
||||
UNTARGET_REGION
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||
return haswell::find_structural_bits(buf, len, pj);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
|
|
@ -0,0 +1,34 @@
|
|||
#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
|
||||
#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
#include "haswell/architecture.h"
|
||||
#include "haswell/stringparsing.h"
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson::haswell {
|
||||
|
||||
#include "generic/stage2_build_tape.h"
|
||||
|
||||
} // namespace simdjson::haswell
|
||||
UNTARGET_REGION
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
return haswell::unified_machine(buf, len, pj);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
|
||||
#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
|
|
@ -1,15 +1,16 @@
|
|||
#ifndef SIMDJSON_STRINGPARSING_HASWELL_H
|
||||
#define SIMDJSON_STRINGPARSING_HASWELL_H
|
||||
#ifndef SIMDJSON_HASWELL_STRINGPARSING_H
|
||||
#define SIMDJSON_HASWELL_STRINGPARSING_H
|
||||
|
||||
#include "simdjson/stringparsing.h"
|
||||
#include "../stringparsing.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "haswell/architecture.h"
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
template <>
|
||||
really_inline parse_string_helper
|
||||
find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
|
||||
uint8_t *dst) {
|
||||
namespace simdjson::haswell {
|
||||
|
||||
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
|
||||
|
@ -24,14 +25,11 @@ find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
|
|||
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask)) // quote_bits
|
||||
};
|
||||
}
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::HASWELL
|
||||
#define TARGETED_REGION TARGET_HASWELL
|
||||
#include "simdjson/stringparsing_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
#include "generic/stringparsing.h"
|
||||
|
||||
} // namespace simdjson::haswell
|
||||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
#define SIMDJSON_SIMDUTF8CHECK_H
|
||||
|
||||
#include "simdjson/simdjson.h"
|
||||
#include "simdjson/simd_input.h"
|
||||
#include "simd_input.h"
|
||||
|
||||
namespace simdjson {
|
||||
|
|
@ -1,34 +1,18 @@
|
|||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
namespace {
|
||||
// for when clmul is unavailable
|
||||
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
|
||||
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
|
||||
quote_mask = quote_mask ^ (quote_mask << 2);
|
||||
quote_mask = quote_mask ^ (quote_mask << 4);
|
||||
quote_mask = quote_mask ^ (quote_mask << 8);
|
||||
quote_mask = quote_mask ^ (quote_mask << 16);
|
||||
quote_mask = quote_mask ^ (quote_mask << 32);
|
||||
return quote_mask;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::HASWELL
|
||||
#define TARGETED_REGION TARGET_HASWELL
|
||||
#include "simdjson/stage1_find_marks_flatten_haswell.h"
|
||||
#include "simdjson/stage1_find_marks_haswell.h"
|
||||
#include "simdjson/stage1_find_marks_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
|
||||
#define TARGETED_REGION TARGET_WESTMERE
|
||||
#include "simdjson/stage1_find_marks_flatten_common.h"
|
||||
#include "simdjson/stage1_find_marks_westmere.h"
|
||||
#include "simdjson/stage1_find_marks_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
|
||||
#ifdef IS_ARM64
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::ARM64
|
||||
#define TARGETED_REGION TARGET_ARM64
|
||||
#include "simdjson/stage1_find_marks_flatten_common.h"
|
||||
#include "simdjson/stage1_find_marks_arm64.h"
|
||||
#include "simdjson/stage1_find_marks_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
|
||||
#endif // IS_ARM64
|
||||
#include "arm64/stage1_find_marks.h"
|
||||
#include "haswell/stage1_find_marks.h"
|
||||
#include "westmere/stage1_find_marks.h"
|
||||
|
|
|
@ -1,23 +1,3 @@
|
|||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
#define TARGETED_ARCHITECTURE Architecture::HASWELL
|
||||
#define TARGETED_REGION TARGET_HASWELL
|
||||
#include "simdjson/stage2_build_tape_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
|
||||
#define TARGETED_REGION TARGET_WESTMERE
|
||||
#include "simdjson/stage2_build_tape_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
#endif // IS_X86_64
|
||||
|
||||
#ifdef IS_ARM64
|
||||
#define TARGETED_ARCHITECTURE Architecture::ARM64
|
||||
#define TARGETED_REGION TARGET_ARM64
|
||||
#include "simdjson/stage2_build_tape_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
#endif // IS_ARM64
|
||||
#include "arm64/stage2_build_tape.h"
|
||||
#include "haswell/stage2_build_tape.h"
|
||||
#include "westmere/stage2_build_tape.h"
|
||||
|
|
|
@ -12,6 +12,7 @@ void found_bad_string(const uint8_t *buf);
|
|||
#endif
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
// begin copypasta
|
||||
// These chars yield themselves: " \ /
|
||||
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
|
||||
|
@ -84,22 +85,6 @@ struct parse_string_helper {
|
|||
uint32_t quote_bits;
|
||||
};
|
||||
|
||||
// Finds where the backslashes and quotes are located.
|
||||
template <Architecture>
|
||||
parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src,
|
||||
uint8_t *dst);
|
||||
|
||||
template <Architecture T>
|
||||
WARN_UNUSED
|
||||
really_inline bool
|
||||
parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj,
|
||||
UNUSED const uint32_t depth, UNUSED uint32_t offset);
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
/// Now include the specializations:
|
||||
#include "simdjson/stringparsing_arm64.h"
|
||||
#include "simdjson/stringparsing_haswell.h"
|
||||
#include "simdjson/stringparsing_westmere.h"
|
||||
|
||||
#endif
|
||||
#endif // SIMDJSON_STRINGPARSING_H
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef SIMDJSON_WESTMERE_ARCHITECTURE_H
|
||||
#define SIMDJSON_WESTMERE_ARCHITECTURE_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
namespace simdjson::westmere {
|
||||
|
||||
static const Architecture ARCHITECTURE = Architecture::WESTMERE;
|
||||
|
||||
} // namespace simdjson::westmere
|
||||
|
||||
|
||||
#endif // IS_X86_64
|
||||
|
||||
#endif // SIMDJSON_WESTMERE_ARCHITECTURE_H
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef SIMDJSON_SIMD_INPUT_WESTMERE_H
|
||||
#define SIMDJSON_SIMD_INPUT_WESTMERE_H
|
||||
#ifndef SIMDJSON_WESTMERE_SIMD_INPUT_H
|
||||
#define SIMDJSON_WESTMERE_SIMD_INPUT_H
|
||||
|
||||
#include "simdjson/simd_input.h"
|
||||
#include "../simd_input.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
|
@ -54,4 +54,4 @@ struct simd_input<Architecture::WESTMERE> {
|
|||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
#endif // SIMDJSON_SIMD_INPUT_WESTMERE_H
|
||||
#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H
|
|
@ -1,8 +1,8 @@
|
|||
#ifndef SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
|
||||
#define SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
|
||||
#ifndef SIMDJSON_WESTMERE_SIMDUTF8CHECK_H
|
||||
#define SIMDJSON_WESTMERE_SIMDUTF8CHECK_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/simdutf8check.h"
|
||||
#include "simdutf8check.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
@ -29,8 +29,7 @@
|
|||
|
||||
/********** sse code **********/
|
||||
TARGET_WESTMERE
|
||||
|
||||
namespace simdjson {
|
||||
namespace simdjson::westmere {
|
||||
|
||||
// all byte values must be no larger than 0xF4
|
||||
static inline void check_smaller_than_0xF4(__m128i current_bytes,
|
||||
|
@ -164,6 +163,14 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
|
|||
return pb;
|
||||
}
|
||||
|
||||
} // namespace simdjson::westmere
|
||||
UNTARGET_REGION // westmere
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson {
|
||||
|
||||
using namespace simdjson::westmere;
|
||||
|
||||
template <>
|
||||
struct utf8_checker<Architecture::WESTMERE> {
|
||||
__m128i has_error = _mm_setzero_si128();
|
|
@ -1,30 +1,30 @@
|
|||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
||||
#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
|
||||
#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
|
||||
|
||||
#include "simdjson/simd_input_westmere.h"
|
||||
#include "simdjson/simdutf8check_westmere.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson {
|
||||
#include "westmere/architecture.h"
|
||||
#include "westmere/simd_input.h"
|
||||
#include "westmere/simdutf8check.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
|
||||
template <>
|
||||
really_inline uint64_t
|
||||
compute_quote_mask<Architecture::WESTMERE>(uint64_t quote_bits) {
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson::westmere {
|
||||
|
||||
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
|
||||
return _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline void find_whitespace_and_structurals<Architecture::WESTMERE>(
|
||||
simd_input<Architecture::WESTMERE> in, uint64_t &whitespace,
|
||||
uint64_t &structurals) {
|
||||
static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
||||
uint64_t &whitespace, uint64_t &structurals) {
|
||||
|
||||
const __m128i structural_table =
|
||||
_mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
||||
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
|
||||
100, 9, 10, 112, 100, 13, 100, 100);
|
||||
100, 9, 10, 112, 100, 13, 100, 100);
|
||||
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
|
||||
const __m128i struct_mask = _mm_set1_epi8(32);
|
||||
|
||||
|
@ -66,11 +66,25 @@ really_inline void find_whitespace_and_structurals<Architecture::WESTMERE>(
|
|||
uint64_t structural_res_3 = _mm_movemask_epi8(struct4);
|
||||
|
||||
structurals = (structural_res_0 | (structural_res_1 << 16) |
|
||||
(structural_res_2 << 32) | (structural_res_3 << 48));
|
||||
(structural_res_2 << 32) | (structural_res_3 << 48));
|
||||
}
|
||||
|
||||
#include "generic/stage1_find_marks_flatten.h"
|
||||
#include "generic/stage1_find_marks.h"
|
||||
|
||||
} // namespace westmere
|
||||
UNTARGET_REGION
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
|
||||
return westmere::find_structural_bits(buf, len, pj);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
||||
#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
|
|
@ -0,0 +1,34 @@
|
|||
#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
|
||||
#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
#include "westmere/architecture.h"
|
||||
#include "westmere/stringparsing.h"
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson::westmere {
|
||||
|
||||
#include "generic/stage2_build_tape.h"
|
||||
|
||||
} // namespace simdjson::westmere
|
||||
UNTARGET_REGION
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson {
|
||||
|
||||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
return westmere::unified_machine(buf, len, pj);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
|
||||
#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
|
|
@ -1,15 +1,16 @@
|
|||
#ifndef SIMDJSON_STRINGPARSING_WESTMERE_H
|
||||
#define SIMDJSON_STRINGPARSING_WESTMERE_H
|
||||
#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H
|
||||
#define SIMDJSON_WESTMERE_STRINGPARSING_H
|
||||
|
||||
#include "simdjson/stringparsing.h"
|
||||
#include "../stringparsing.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "westmere/architecture.h"
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson {
|
||||
template <>
|
||||
really_inline parse_string_helper
|
||||
find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
|
||||
uint8_t *dst) {
|
||||
namespace simdjson::westmere {
|
||||
|
||||
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
__m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
|
||||
|
@ -23,14 +24,11 @@ find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
|
|||
static_cast<uint32_t>(_mm_movemask_epi8(quote_mask)) // quote_bits
|
||||
};
|
||||
}
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
|
||||
#define TARGETED_REGION TARGET_WESTMERE
|
||||
#include "simdjson/stringparsing_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
#include "generic/stringparsing.h"
|
||||
|
||||
} // namespace simdjson::westmere
|
||||
UNTARGET_REGION
|
||||
|
||||
#endif // IS_X86_64
|
||||
|
Loading…
Reference in New Issue