Move architecture-specific headers to src/ (#287)

* Use namespaces instead of templates for stage1 impls

* Move stage1 implementation into the src/ directory

* Move architecture-specific code to src/
This commit is contained in:
John Keiser 2019-08-21 04:59:49 -07:00 committed by Daniel Lemire
parent a1bff85263
commit 585f84a734
40 changed files with 5045 additions and 5094 deletions

31
.gitignore vendored
View File

@ -7,10 +7,14 @@
# Build outputs (TODO build to a subdir so we can exclude that instead)
/allparserscheckfile
/basictests
/benchmark/parse
/benchmark/perfdiff
/benchmark/statisticalmodel
/json2json
/jsoncheck
/jsonpointer
/jsonstats
/libsimdjson.so*
/minify
/numberparsingcheck
/parse
@ -25,8 +29,33 @@
/simdjson.h
/singleheader/amalgamation_demo
/singleheader/demo
/tests/basictests
/tests/jsoncheck
/tests/pointercheck
/tools/json2json
/tools/jsonstats
/tools/minify
# CMake ignore from https://github.com/github/gitignore/blob/master/CMake.gitignore
# Generic from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
# CMake files that may be specific to our installation
/CPackConfig.cmake
/CPackSourceConfig.cmake
# We check in a custom version of root Makefile that is not generated by CMake
!/Makefile
# C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
# Prerequisites
*.d

View File

@ -22,7 +22,7 @@ else
ARCHFLAGS ?= -msse4.2 -mpclmul # lowest supported feature set?
endif
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(EXTRAFLAGS)
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Isrc -Ibenchmark/linux $(EXTRAFLAGS)
CFLAGS = $(ARCHFLAGS) -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src $(EXTRAFLAGS)
@ -63,7 +63,11 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten_common.h include/simdjson/stage1_find_marks_flatten_haswell.h
# Load headers and sources
LIBHEADERS=src/simd_input.h src/simdutf8check.h src/stringparsing.h src/arm64/architecture.h src/arm64/simd_input.h src/arm64/simdutf8check.h src/arm64/stage1_find_marks.h src/arm64/stage2_build_tape.h src/arm64/stringparsing.h src/generic/stage1_find_marks_flatten.h src/generic/stage1_find_marks.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/haswell/architecture.h src/haswell/simd_input.h src/haswell/simdutf8check.h src/haswell/stage1_find_marks.h src/haswell/stage2_build_tape.h src/haswell/stringparsing.h src/westmere/architecture.h src/westmere/simd_input.h src/westmere/simdutf8check.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/numberparsing.h include/simdjson/padded_string.h include/simdjson/parsedjson.h include/simdjson/parsedjsoniterator.h include/simdjson/portability.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/simdprune_tables.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
HEADERS=$(PUBHEADERS) $(LIBHEADERS)
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
MINIFIERLIBFILES=src/jsonminifier.cpp

View File

@ -17,8 +17,28 @@ $SCRIPTPATH/src/simdjson.cpp
$SCRIPTPATH/src/jsonioutil.cpp
$SCRIPTPATH/src/jsonminifier.cpp
$SCRIPTPATH/src/jsonparser.cpp
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
$SCRIPTPATH/src/simd_input.h
$SCRIPTPATH/src/arm64/architecture.h
$SCRIPTPATH/src/haswell/architecture.h
$SCRIPTPATH/src/westmere/architecture.h
$SCRIPTPATH/src/arm64/simd_input.h
$SCRIPTPATH/src/haswell/simd_input.h
$SCRIPTPATH/src/westmere/simd_input.h
$SCRIPTPATH/src/simdutf8check.h
$SCRIPTPATH/src/arm64/simdutf8check.h
$SCRIPTPATH/src/haswell/simdutf8check.h
$SCRIPTPATH/src/westmere/simdutf8check.h
$SCRIPTPATH/src/arm64/stage1_find_marks.h
$SCRIPTPATH/src/haswell/stage1_find_marks.h
$SCRIPTPATH/src/westmere/stage1_find_marks.h
$SCRIPTPATH/src/stage1_find_marks.cpp
$SCRIPTPATH/src/stringparsing.h
$SCRIPTPATH/src/arm64/stringparsing.h
$SCRIPTPATH/src/haswell/stringparsing.h
$SCRIPTPATH/src/westmere/stringparsing.h
$SCRIPTPATH/src/arm64/stage2_build_tape.h
$SCRIPTPATH/src/haswell/stage2_build_tape.h
$SCRIPTPATH/src/westmere/stage2_build_tape.h
$SCRIPTPATH/src/stage2_build_tape.cpp
$SCRIPTPATH/src/parsedjson.cpp
$SCRIPTPATH/src/parsedjsoniterator.cpp
@ -36,25 +56,10 @@ $SCRIPTPATH/include/simdjson/jsoncharutils.h
$SCRIPTPATH/include/simdjson/jsonformatutils.h
$SCRIPTPATH/include/simdjson/jsonioutil.h
$SCRIPTPATH/include/simdjson/simdprune_tables.h
$SCRIPTPATH/include/simdjson/simd_input.h
$SCRIPTPATH/include/simdjson/simd_input_haswell.h
$SCRIPTPATH/include/simdjson/simd_input_westmere.h
$SCRIPTPATH/include/simdjson/simd_input_arm64.h
$SCRIPTPATH/include/simdjson/simdutf8check.h
$SCRIPTPATH/include/simdjson/simdutf8check_haswell.h
$SCRIPTPATH/include/simdjson/simdutf8check_westmere.h
$SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
$SCRIPTPATH/include/simdjson/jsonminifier.h
$SCRIPTPATH/include/simdjson/parsedjson.h
$SCRIPTPATH/include/simdjson/parsedjsoniterator.h
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
$SCRIPTPATH/include/simdjson/stringparsing.h
$SCRIPTPATH/include/simdjson/stringparsing_westmere.h
$SCRIPTPATH/include/simdjson/stringparsing_haswell.h
$SCRIPTPATH/include/simdjson/stringparsing_arm64.h
$SCRIPTPATH/include/simdjson/numberparsing.h
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
$SCRIPTPATH/include/simdjson/jsonparser.h
@ -74,17 +79,27 @@ function dofile()
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
while IFS= read -r line
do
if [[ "${line}" == '#include "simdjson'* ]]; then
# we paste the contents of simdjson header files with names ending by _common.h
# we ignore every other simdjson headers
if [[ "${line}" == '#include "simdjson/'*'_common.h"'* ]]; then
file=$(echo $line| cut -d'"' -f 2)
echo "$(<include/$file)" # we assume those files are always in include/
fi
else
# Otherwise we simply copy the line
echo "$line"
if [[ "${line}" == '#include "'*'"'* ]]; then
file=$(echo $line| cut -d'"' -f 2)
if [[ "${file}" == '../'* ]]; then
file=$(echo $file| cut -d'/' -f 2-)
fi;
# we ignore simdjson headers (except src/generic/*.h); they are handled in the above list
if [ -f include/$file ]; then
continue;
elif [ -f src/$file ]; then
# we paste the contents of src/generic/*.h
if [[ "${file}" == *'generic/'*'.h' ]]; then
echo "$(<src/$file)"
fi;
continue;
fi;
fi;
# Otherwise we simply copy the line
echo "$line"
done < "$1"
echo "/* end file $RELFILE */"
}

View File

@ -88,8 +88,7 @@ int unified_machine_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
}
// Responsible to select the best json_parse implementation
int find_structural_bits_dispatch(const uint8_t *buf, size_t len,
ParsedJson &pj) {
int find_structural_bits_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
Architecture best_implementation = _find_best_supported_implementation();
// Selecting the best implementation
switch (best_implementation) {

View File

@ -1,35 +1,20 @@
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include/simdjson)
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
set(SIMDJSON_INCLUDE
${SIMDJSON_INCLUDE_DIR}/common_defs.h
${SIMDJSON_INCLUDE_DIR}/isadetection.h
${SIMDJSON_INCLUDE_DIR}/jsoncharutils.h
${SIMDJSON_INCLUDE_DIR}/jsonformatutils.h
${SIMDJSON_INCLUDE_DIR}/jsonioutil.h
${SIMDJSON_INCLUDE_DIR}/jsonminifier.h
${SIMDJSON_INCLUDE_DIR}/jsonparser.h
${SIMDJSON_INCLUDE_DIR}/numberparsing.h
${SIMDJSON_INCLUDE_DIR}/padded_string.h
${SIMDJSON_INCLUDE_DIR}/parsedjson.h
${SIMDJSON_INCLUDE_DIR}/parsedjsoniterator.h
${SIMDJSON_INCLUDE_DIR}/portability.h
${SIMDJSON_INCLUDE_DIR}/simdjson.h
${SIMDJSON_INCLUDE_DIR}/simdjson_version.h
${SIMDJSON_INCLUDE_DIR}/simdprune_tables.h
${SIMDJSON_INCLUDE_DIR}/simdutf8check_arm64.h
${SIMDJSON_INCLUDE_DIR}/simdutf8check_haswell.h
${SIMDJSON_INCLUDE_DIR}/simdutf8check_westmere.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_arm64.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_common.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_common.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_haswell.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_haswell.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_westmere.h
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape.h
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape_common.h
${SIMDJSON_INCLUDE_DIR}/stringparsing.h
${SIMDJSON_INCLUDE_DIR}/stringparsing_arm64.h
${SIMDJSON_INCLUDE_DIR}/stringparsing_common.h
${SIMDJSON_INCLUDE_DIR}/stringparsing_haswell.h
${SIMDJSON_INCLUDE_DIR}/stringparsing_westmere.h
${SIMDJSON_INCLUDE_DIR}/simdjson/common_defs.h
${SIMDJSON_INCLUDE_DIR}/simdjson/isadetection.h
${SIMDJSON_INCLUDE_DIR}/simdjson/jsoncharutils.h
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonformatutils.h
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonminifier.h
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonparser.h
${SIMDJSON_INCLUDE_DIR}/simdjson/numberparsing.h
${SIMDJSON_INCLUDE_DIR}/simdjson/padded_string.h
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjson.h
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjsoniterator.h
${SIMDJSON_INCLUDE_DIR}/simdjson/portability.h
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson.h
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson_version.h
${SIMDJSON_INCLUDE_DIR}/simdjson/simdprune_tables.h
${SIMDJSON_INCLUDE_DIR}/simdjson/stage1_find_marks.h
${SIMDJSON_INCLUDE_DIR}/simdjson/stage2_build_tape.h
)

View File

@ -1,108 +1,19 @@
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
#define SIMDJSON_STAGE1_FIND_MARKS_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
#include "simdjson/portability.h"
#include "simdjson/simdjson.h"
#include "simdjson/simd_input.h"
#include <cassert>
namespace simdjson {
template <Architecture> uint64_t compute_quote_mask(uint64_t quote_bits);
namespace {
// for when clmul is unavailable
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
quote_mask = quote_mask ^ (quote_mask << 2);
quote_mask = quote_mask ^ (quote_mask << 4);
quote_mask = quote_mask ^ (quote_mask << 8);
quote_mask = quote_mask ^ (quote_mask << 16);
quote_mask = quote_mask ^ (quote_mask << 32);
return quote_mask;
}
} // namespace
template <Architecture T>
really_inline uint64_t find_odd_backslash_sequences(
simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash);
template <Architecture T>
really_inline uint64_t find_quote_mask_and_bits(
simd_input<T> in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote,
uint64_t &quote_bits, uint64_t &error_mask);
// do a 'shufti' to detect structural JSON characters
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
// these go into the first 3 buckets of the comparison (1/2/4)
// we are also interested in the four whitespace characters
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
// these go into the next 2 buckets of the comparison (8/16)
template <Architecture T>
void find_whitespace_and_structurals(simd_input<T> in, uint64_t &whitespace,
uint64_t &structurals);
// return a updated structural bit vector with quoted contents cleared out and
// pseudo-structural characters added to the mask
// updates prev_iter_ends_pseudo_pred which tells us whether the previous
// iteration ended on a whitespace or a structural character (which means that
// the next iteration
// will have a pseudo-structural character at its start)
really_inline uint64_t finalize_structurals(
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
// mask off anything inside quotes
structurals &= ~quote_mask;
// add the real quote bits back into our bit_mask as well, so we can
// quickly traverse the strings we've spent all this trouble gathering
structurals |= quote_bits;
// Now, establish "pseudo-structural characters". These are non-whitespace
// characters that are (a) outside quotes and (b) have a predecessor that's
// either whitespace or a structural character. This means that subsequent
// passes will get a chance to encounter the first character of every string
// of non-whitespace and, if we're parsing an atom like true/false/null or a
// number we can stop at the first whitespace or structural character
// following it.
// a qualified predecessor is something that can happen 1 position before an
// pseudo-structural character
uint64_t pseudo_pred = structurals | whitespace;
uint64_t shifted_pseudo_pred =
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
uint64_t pseudo_structurals =
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
structurals |= pseudo_structurals;
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
return structurals;
}
template <Architecture T = Architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj);
template <Architecture T = Architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len,
simdjson::ParsedJson &pj);
template <Architecture T = Architecture::NATIVE>
int find_structural_bits(const char *buf, size_t len,
simdjson::ParsedJson &pj) {
int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj) {
return find_structural_bits((const uint8_t *)buf, len, pj);
}
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
template <Architecture T = Architecture::NATIVE>
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits);
} // namespace simdjson
}; // namespace simdjson
#endif

View File

@ -1,90 +0,0 @@
// This file provides the same function as
// stage1_find_marks_flatten_common.h, but uses Intel intrinsics.
// This should provide better performance on Visual Studio
// and other compilers that do a conservative optimization.
// Specifically, on x64 processors with BMI,
// x & (x - 1) should be mapped to
// the blsr instruction. By using the
// _blsr_u64 intrinsic, we
// ensure that this will happen.
/////////
#include "simdjson/common_defs.h"
#include "simdjson/portability.h"
#ifdef IS_X86_64
TARGET_HASWELL
namespace simdjson {
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
template<>
really_inline void flatten_bits<Architecture::HASWELL>(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
if (bits == 0)
return;
uint32_t cnt = _mm_popcnt_u64(bits);
uint32_t next_base = base + cnt;
idx -= 64;
base_ptr += base;
{
base_ptr[0] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[1] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[2] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[3] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[4] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[5] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[6] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[7] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr += 8;
}
// We hope that the next branch is easily predicted.
if (cnt > 8) {
base_ptr[0] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[1] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[2] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[3] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[4] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[5] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[6] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[7] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr += 8;
}
if (cnt > 16) { // unluckly: we rarely get here
// since it means having one structural or pseudo-structral element
// every 4 characters (possible with inputs like "","","",...).
do {
base_ptr[0] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr++;
} while (bits != 0);
}
base = next_base;
}
} // namespace simdjson
UNTARGET_REGION
#endif // IS_X86_64

View File

@ -1,116 +0,0 @@
#ifndef SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
#define SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
#include "simdjson/simd_input_haswell.h"
#include "simdjson/simdutf8check_haswell.h"
#include "simdjson/stage1_find_marks.h"
#ifdef IS_X86_64
TARGET_HASWELL
namespace simdjson {
template <>
really_inline uint64_t
compute_quote_mask<Architecture::HASWELL>(uint64_t quote_bits) {
// There should be no such thing with a processing supporting avx2
// but not clmul.
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
return quote_mask;
}
template <>
really_inline void find_whitespace_and_structurals<Architecture::HASWELL>(
simd_input<Architecture::HASWELL> in, uint64_t &whitespace,
uint64_t &structurals) {
#ifdef SIMDJSON_NAIVE_STRUCTURAL
// You should never need this naive approach, but it can be useful
// for research purposes
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
struct_lo =
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_brace));
struct_hi =
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_brace));
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
struct_lo =
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_open_bracket));
struct_hi =
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_open_bracket));
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
struct_lo =
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_bracket));
struct_hi =
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_bracket));
const __m256i mask_column = _mm256_set1_epi8(0x3a);
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_column));
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_column));
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_comma));
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_comma));
uint64_t structural_res_0 =
static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
structurals = (structural_res_0 | (structural_res_1 << 32));
const __m256i mask_space = _mm256_set1_epi8(0x20);
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_linefeed));
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_linefeed));
const __m256i mask_tab = _mm256_set1_epi8(0x09);
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_tab));
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_tab));
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_carriage));
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_carriage));
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
whitespace = (ws_res_0 | (ws_res_1 << 32));
// end of naive approach
#else // SIMDJSON_NAIVE_STRUCTURAL
// clang-format off
const __m256i structural_table =
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
const __m256i white_table = _mm256_setr_epi8(
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
// clang-format on
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
const __m256i struct_mask = _mm256_set1_epi8(32);
__m256i lo_white =
_mm256_cmpeq_epi8(in.lo, _mm256_shuffle_epi8(white_table, in.lo));
__m256i hi_white =
_mm256_cmpeq_epi8(in.hi, _mm256_shuffle_epi8(white_table, in.hi));
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_white));
uint64_t ws_res_1 = _mm256_movemask_epi8(hi_white);
whitespace = (ws_res_0 | (ws_res_1 << 32));
__m256i lo_struct_r1 = _mm256_add_epi8(struct_offset, in.lo);
__m256i hi_struct_r1 = _mm256_add_epi8(struct_offset, in.hi);
__m256i lo_struct_r2 = _mm256_or_si256(in.lo, struct_mask);
__m256i hi_struct_r2 = _mm256_or_si256(in.hi, struct_mask);
__m256i lo_struct_r3 = _mm256_shuffle_epi8(structural_table, lo_struct_r1);
__m256i hi_struct_r3 = _mm256_shuffle_epi8(structural_table, hi_struct_r1);
__m256i lo_struct = _mm256_cmpeq_epi8(lo_struct_r2, lo_struct_r3);
__m256i hi_struct = _mm256_cmpeq_epi8(hi_struct_r2, hi_struct_r3);
uint64_t structural_res_0 =
static_cast<uint32_t>(_mm256_movemask_epi8(lo_struct));
uint64_t structural_res_1 = _mm256_movemask_epi8(hi_struct);
structurals = (structural_res_0 | (structural_res_1 << 32));
#endif // SIMDJSON_NAIVE_STRUCTURAL
}
} // namespace simdjson
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H

View File

@ -10,7 +10,6 @@
#include "simdjson/numberparsing.h"
#include "simdjson/parsedjson.h"
#include "simdjson/simdjson.h"
#include "simdjson/stringparsing.h"
namespace simdjson {
void init_state_machine();

View File

@ -1,4 +1,4 @@
/* auto-generated on Wed Aug 14 13:56:54 DST 2019. Do not edit! */
/* auto-generated on Sun Aug 18 15:06:50 DST 2019. Do not edit! */
#include <iostream>
#include "simdjson.h"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -18,23 +18,55 @@ MESSAGE( STATUS "SIMDJSON_LIB_TYPE: " ${SIMDJSON_LIB_TYPE})
# Bring in include files
include(../include/CMakeLists.txt)
set(SIMDJSON_SRC
jsonioutil.cpp
jsonminifier.cpp
jsonparser.cpp
stage1_find_marks.cpp
stage2_build_tape.cpp
parsedjson.cpp
parsedjsoniterator.cpp
simdjson.cpp
${SIMDJSON_INCLUDE}
)
add_library(${SIMDJSON_LIB_NAME} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC})
set(SIMDJSON_SRC_DIR $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>)
set(SIMDJSON_SRC
jsonioutil.cpp
jsonminifier.cpp
jsonparser.cpp
stage1_find_marks.cpp
stage2_build_tape.cpp
parsedjson.cpp
parsedjsoniterator.cpp
simdjson.cpp
)
# Load headers and sources
set(SIMDJSON_SRC_HEADERS
arm64/architecture.h
arm64/simd_input.h
arm64/simdutf8check.h
arm64/stage1_find_marks.h
arm64/stage2_build_tape.h
arm64/stringparsing.h
generic/stage1_find_marks_flatten.h
generic/stage1_find_marks.h
generic/stage2_build_tape.h
generic/stringparsing.h
haswell/architecture.h
haswell/simd_input.h
haswell/simdutf8check.h
haswell/stage1_find_marks.h
haswell/stage2_build_tape.h
haswell/stringparsing.h
westmere/architecture.h
westmere/simd_input.h
westmere/simdutf8check.h
westmere/stage1_find_marks.h
westmere/stage2_build_tape.h
westmere/stringparsing.h
simd_input.h
simdutf8check.h
stringparsing.h
)
add_library(${SIMDJSON_LIB_NAME} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC} ${SIMDJSON_INCLUDE} ${SIMDJSON_SRC_HEADERS})
target_include_directories(${SIMDJSON_LIB_NAME}
PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${SIMDJSON_SRC_DIR}>
$<BUILD_INTERFACE:${SIMDJSON_INCLUDE_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)

18
src/arm64/architecture.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef SIMDJSON_ARM64_ARCHITECTURE_H
#define SIMDJSON_ARM64_ARCHITECTURE_H
#include "simdjson/portability.h"
#ifdef IS_ARM64
#include "simdjson/simdjson.h"
namespace simdjson::arm64 {
static const Architecture ARCHITECTURE = Architecture::ARM64;
} // namespace simdjson::arm64
#endif // IS_ARM64
#endif // SIMDJSON_ARM64_ARCHITECTURE_H

View File

@ -1,9 +1,10 @@
#ifndef SIMDJSON_SIMD_INPUT_ARM64_H
#define SIMDJSON_SIMD_INPUT_ARM64_H
#ifndef SIMDJSON_ARM64_SIMD_INPUT_H
#define SIMDJSON_ARM64_SIMD_INPUT_H
#include "simdjson/simd_input.h"
#include "../simd_input.h"
#ifdef IS_ARM64
namespace simdjson {
really_inline uint16_t neon_movemask(uint8x16_t input) {
@ -68,4 +69,4 @@ struct simd_input<Architecture::ARM64> {
} // namespace simdjson
#endif // IS_ARM64
#endif // SIMDJSON_SIMD_INPUT_ARM64_H
#endif // SIMDJSON_ARM64_SIMD_INPUT_H

View File

@ -1,13 +1,13 @@
// From https://github.com/cyb70289/utf8/blob/master/lemire-neon.c
// Adapted from https://github.com/lemire/fastvalidate-utf-8
#ifndef SIMDJSON_SIMDUTF8CHECK_ARM64_H
#define SIMDJSON_SIMDUTF8CHECK_ARM64_H
#ifndef SIMDJSON_ARM64_SIMDUTF8CHECK_H
#define SIMDJSON_ARM64_SIMDUTF8CHECK_H
#if defined(_ARM_NEON) || defined(__aarch64__) || \
(defined(_MSC_VER) && defined(_M_ARM64))
#include "simdjson/simdutf8check.h"
#include "../simdutf8check.h"
#include <arm_neon.h>
#include <cinttypes>
#include <cstddef>
@ -31,7 +31,7 @@
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
*
*/
namespace simdjson {
namespace simdjson::arm64 {
// all byte values must be no larger than 0xF4
static inline void check_smaller_than_0xF4(int8x16_t current_bytes,
@ -191,6 +191,12 @@ really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) {
return vget_lane_u64(result, 0) == 0;
}
} // namespace simdjson::arm64
namespace simdjson {
using namespace simdjson::arm64;
template <>
struct utf8_checker<Architecture::ARM64> {
int8x16_t has_error{};

View File

@ -1,16 +1,19 @@
#ifndef SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
#define SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
#include "simdjson/simd_input_arm64.h"
#include "simdjson/simdutf8check_arm64.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/portability.h"
#ifdef IS_ARM64
namespace simdjson {
template <>
really_inline uint64_t
compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
#include "arm64/architecture.h"
#include "arm64/simd_input.h"
#include "arm64/simdutf8check.h"
#include "simdjson/stage1_find_marks.h"
namespace simdjson::arm64 {
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
return vmull_p64(-1ULL, quote_bits);
#else
@ -18,9 +21,8 @@ compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
#endif
}
template <>
really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
simd_input<Architecture::ARM64> in, uint64_t &whitespace,
static really_inline void find_whitespace_and_structurals(
simd_input<ARCHITECTURE> in, uint64_t &whitespace,
uint64_t &structurals) {
const uint8x16_t low_nibble_mask =
(uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
@ -66,7 +68,20 @@ really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
}
#include "generic/stage1_find_marks_flatten.h"
#include "generic/stage1_find_marks.h"
} // namespace simdjson::arm64
namespace simdjson {
template <>
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
return arm64::find_structural_bits(buf, len, pj);
}
} // namespace simdjson
#endif // IS_ARM64
#endif // SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H

View File

@ -0,0 +1,30 @@
#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
#include "simdjson/portability.h"
#ifdef IS_ARM64
#include "simdjson/stage2_build_tape.h"
#include "arm64/architecture.h"
#include "arm64/stringparsing.h"
namespace simdjson::arm64 {
#include "generic/stage2_build_tape.h"
} // namespace simdjson::arm64
namespace simdjson {
template <>
WARN_UNUSED int
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return arm64::unified_machine(buf, len, pj);
}
} // namespace simdjson
#endif // IS_ARM64
#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H

View File

@ -1,14 +1,15 @@
#ifndef SIMDJSON_STRINGPARSING_ARM64_H
#define SIMDJSON_STRINGPARSING_ARM64_H
#ifndef SIMDJSON_ARM64_STRINGPARSING_H
#define SIMDJSON_ARM64_STRINGPARSING_H
#include "simdjson/stringparsing.h"
#include "../stringparsing.h"
#ifdef IS_ARM64
namespace simdjson {
template <>
really_inline parse_string_helper
find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
uint8_t *dst) {
#include "arm64/architecture.h"
namespace simdjson::arm64 {
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
@ -39,15 +40,13 @@ find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits
};
}
} // namespace simdjson
#include "generic/stringparsing.h"
#define TARGETED_ARCHITECTURE Architecture::ARM64
#define TARGETED_REGION TARGET_ARM64
#include "simdjson/stringparsing_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
}
// namespace simdjson::amd64
#endif // IS_ARM64
#endif

View File

@ -1,14 +1,8 @@
// This file contains the common code every implementation uses in stage1
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is include already includes
// We assume the file in which it is included already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
#ifdef TARGETED_ARCHITECTURE
#ifdef TARGETED_REGION
TARGETED_REGION
namespace simdjson {
// return a bitvector indicating where we have characters that end an odd-length
// sequence of backslashes (and thus change the behavior of the next character
// to follow). A even-length sequence of backslashes, and, for that matter, the
@ -18,9 +12,8 @@ namespace simdjson {
// indicate whether we end an iteration on an odd-length sequence of
// backslashes, which modifies our subsequent search for odd-length
// sequences of backslashes in an obvious way.
template <>
really_inline uint64_t find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
simd_input<TARGETED_ARCHITECTURE> in,
really_inline uint64_t find_odd_backslash_sequences(
simd_input<ARCHITECTURE> in,
uint64_t &prev_iter_ends_odd_backslash) {
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
@ -66,14 +59,13 @@ really_inline uint64_t find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
// Note that we don't do any error checking to see if we have backslash
// sequences outside quotes; these
// backslash sequences (of any length) will be detected elsewhere.
template <>
really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
simd_input<TARGETED_ARCHITECTURE> in, uint64_t odd_ends,
really_inline uint64_t find_quote_mask_and_bits(
simd_input<ARCHITECTURE> in, uint64_t odd_ends,
uint64_t &prev_iter_inside_quote, uint64_t &quote_bits,
uint64_t &error_mask) {
quote_bits = in.eq('"');
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = compute_quote_mask<TARGETED_ARCHITECTURE>(quote_bits);
uint64_t quote_mask = compute_quote_mask(quote_bits);
quote_mask ^= prev_iter_inside_quote;
/* All Unicode characters may be placed within the
* quotation marks, except for the characters that MUST be escaped:
@ -90,33 +82,65 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
return quote_mask;
}
really_inline uint64_t finalize_structurals(
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
// mask off anything inside quotes
structurals &= ~quote_mask;
// add the real quote bits back into our bit_mask as well, so we can
// quickly traverse the strings we've spent all this trouble gathering
structurals |= quote_bits;
// Now, establish "pseudo-structural characters". These are non-whitespace
// characters that are (a) outside quotes and (b) have a predecessor that's
// either whitespace or a structural character. This means that subsequent
// passes will get a chance to encounter the first character of every string
// of non-whitespace and, if we're parsing an atom like true/false/null or a
// number we can stop at the first whitespace or structural character
// following it.
// a qualified predecessor is something that can happen 1 position before an
// pseudo-structural character
uint64_t pseudo_pred = structurals | whitespace;
uint64_t shifted_pseudo_pred =
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
uint64_t pseudo_structurals =
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
structurals |= pseudo_structurals;
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
return structurals;
}
// Find structural bits in a 64-byte chunk.
really_inline void find_structural_bits_64(
const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base,
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
uint64_t &error_mask,
utf8_checker<TARGETED_ARCHITECTURE> &utf8_state) {
simd_input<TARGETED_ARCHITECTURE> in(buf);
utf8_checker<ARCHITECTURE> &utf8_state) {
simd_input<ARCHITECTURE> in(buf);
utf8_state.check_next_input(in);
/* detect odd sequences of backslashes */
uint64_t odd_ends = find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
uint64_t odd_ends = find_odd_backslash_sequences(
in, prev_iter_ends_odd_backslash);
/* detect insides of quote pairs ("quote_mask") and also our quote_bits
* themselves */
uint64_t quote_bits;
uint64_t quote_mask = find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
uint64_t quote_mask = find_quote_mask_and_bits(
in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
/* take the previous iterations structural bits, not our current
* iteration,
* and flatten */
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
flatten_bits(base_ptr, base, idx, structurals);
uint64_t whitespace;
find_whitespace_and_structurals<TARGETED_ARCHITECTURE>(in, whitespace,
structurals);
find_whitespace_and_structurals(in, whitespace, structurals);
/* fixup structurals to reflect quotes and add pseudo-structural
* characters */
@ -124,9 +148,7 @@ really_inline void find_structural_bits_64(
quote_bits, prev_iter_ends_pseudo_pred);
}
template <>
int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
ParsedJson &pj) {
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
if (len > pj.byte_capacity) {
std::cerr << "Your ParsedJson object only supports documents up to "
<< pj.byte_capacity << " bytes but you are trying to process "
@ -135,7 +157,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
}
uint32_t *base_ptr = pj.structural_indexes;
uint32_t base = 0;
utf8_checker<TARGETED_ARCHITECTURE> utf8_state;
utf8_checker<ARCHITECTURE> utf8_state;
/* we have padded the input out to 64 byte multiple with the remainder
* being zeros persistent state across loop does the last iteration end
@ -194,7 +216,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
/* finally, flatten out the remaining structurals from the last iteration
*/
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
flatten_bits(base_ptr, base, idx, structurals);
pj.n_structural_indexes = base;
/* a valid JSON file cannot have zero structural indexes - we should have
@ -217,13 +239,3 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
}
return utf8_state.errors();
}
} // namespace simdjson
UNTARGET_REGION
#else
#error TARGETED_REGION must be specified before including.
#endif // TARGETED_REGION
#else
#error TARGETED_ARCHITECTURE must be specified before including.
#endif // TARGETED_ARCHITECTURE

View File

@ -3,20 +3,12 @@
// We assume the file in which it is include already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
#ifdef TARGETED_ARCHITECTURE
#ifdef TARGETED_REGION
TARGETED_REGION
namespace simdjson {
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
//
// This is just a naive implementation. It should be normally
// disable, but can be used for research purposes to compare
// again our optimized version.
template <>
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
uint32_t *out_ptr = base_ptr + base;
idx -= 64;
while (bits != 0) {
@ -27,15 +19,14 @@ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint3
base = (out_ptr - base_ptr);
}
#else
#else // SIMDJSON_NAIVE_FLATTEN
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
template<>
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
@ -96,13 +87,3 @@ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint3
base = next_base;
}
#endif // SIMDJSON_NAIVE_FLATTEN
} // namespace simdjson
UNTARGET_REGION
#else
#error TARGETED_REGION must be specified before including.
#endif // TARGETED_REGION
#else
#error TARGETED_ARCHITECTURE must be specified before including.
#endif // TARGETED_ARCHITECTURE

View File

@ -3,12 +3,6 @@
// We assume the file in which it is include already includes
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
#ifdef TARGETED_ARCHITECTURE
#ifdef TARGETED_REGION
TARGETED_REGION
namespace simdjson {
// this macro reads the next structural character, updating idx, i and c.
#define UPDATE_CHAR() \
{ \
@ -41,10 +35,8 @@ namespace simdjson {
* The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation.
***********/
template <>
WARN_UNUSED int
unified_machine<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
ParsedJson &pj) {
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */
uint32_t idx; /* location of the structural character in the input (buf) */
uint8_t c; /* used to track the (structural) character we are looking at,
@ -100,7 +92,7 @@ unified_machine<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
* https://tools.ietf.org/html/rfc8259
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */
case '"': {
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
}
break;
@ -229,7 +221,7 @@ object_begin:
UPDATE_CHAR();
switch (c) {
case '"': {
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
}
goto object_key_state;
@ -248,7 +240,7 @@ object_key_state:
UPDATE_CHAR();
switch (c) {
case '"': {
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
}
break;
@ -333,7 +325,7 @@ object_continue:
if (c != '"') {
goto fail;
} else {
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
}
goto object_key_state;
@ -367,7 +359,7 @@ main_array_switch:
* on paths that can accept a close square brace (post-, and at start) */
switch (c) {
case '"': {
if (!parse_string<TARGETED_ARCHITECTURE>(buf, len, pj, depth, idx)) {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
}
break;
@ -523,13 +515,3 @@ fail:
pj.error_code = simdjson::TAPE_ERROR;
return pj.error_code;
}
} // namespace simdjson
UNTARGET_REGION
#else
#error TARGETED_REGION must be specified before including.
#endif // TARGETED_REGION
#else
#error TARGETED_ARCHITECTURE must be specified before including.
#endif // TARGETED_ARCHITECTURE

View File

@ -1,28 +1,19 @@
// This file contains the common code every implementation uses
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is include already includes
// "simdjson/stringparsing.h" (this simplifies amalgation)
// "stringparsing.h" (this simplifies amalgation)
#ifdef TARGETED_ARCHITECTURE
#ifdef TARGETED_REGION
TARGETED_REGION
namespace simdjson {
template <>
WARN_UNUSED
really_inline bool
parse_string<TARGETED_ARCHITECTURE>(UNUSED const uint8_t *buf,
UNUSED size_t len, ParsedJson &pj,
UNUSED const uint32_t depth,
UNUSED uint32_t offset) {
WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
UNUSED size_t len, ParsedJson &pj,
UNUSED const uint32_t depth,
UNUSED uint32_t offset) {
pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */
uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t);
const uint8_t *const start_of_string = dst;
while (1) {
parse_string_helper helper =
find_bs_bits_and_quote_bits<TARGETED_ARCHITECTURE>(src, dst);
find_bs_bits_and_quote_bits(src, dst);
if (((helper.bs_bits - 1) & helper.quote_bits) != 0) {
/* we encountered quotes first. Move dst to point to quotes and exit
*/
@ -80,7 +71,7 @@ WARN_UNUSED
} else {
/* they are the same. Since they can't co-occur, it means we
* encountered neither. */
if constexpr (TARGETED_ARCHITECTURE == Architecture::WESTMERE) {
if constexpr (ARCHITECTURE == Architecture::WESTMERE) {
src += 16;
dst += 16;
} else {
@ -92,13 +83,3 @@ WARN_UNUSED
/* can't be reached */
return true;
}
} // namespace simdjson
UNTARGET_REGION
#else
#error TARGETED_REGION must be specified before including.
#endif // TARGETED_REGION
#else
#error TARGETED_ARCHITECTURE must be specified before including.
#endif // TARGETED_ARCHITECTURE

View File

@ -0,0 +1,20 @@
#ifndef SIMDJSON_HASWELL_ARCHITECTURE_H
#define SIMDJSON_HASWELL_ARCHITECTURE_H
#include "simdjson/portability.h"
#ifdef IS_X86_64
#include "simdjson/simdjson.h"
namespace simdjson::haswell {
static const Architecture ARCHITECTURE = Architecture::HASWELL;
} // namespace simdjson::haswell
#endif // IS_X86_64
#endif // SIMDJSON_HASWELL_ARCHITECTURE_H

View File

@ -1,7 +1,7 @@
#ifndef SIMDJSON_SIMD_INPUT_HASWELL_H
#define SIMDJSON_SIMD_INPUT_HASWELL_H
#ifndef SIMDJSON_HASWELL_SIMD_INPUT_H
#define SIMDJSON_HASWELL_SIMD_INPUT_H
#include "simdjson/simd_input.h"
#include "../simd_input.h"
#ifdef IS_X86_64
@ -42,4 +42,4 @@ struct simd_input<Architecture::HASWELL> {
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_SIMD_INPUT_HASWELL_H
#endif // SIMDJSON_HASWELL_SIMD_INPUT_H

View File

@ -1,8 +1,8 @@
#ifndef SIMDJSON_SIMDUTF8CHECK_HASWELL_H
#define SIMDJSON_SIMDUTF8CHECK_HASWELL_H
#ifndef SIMDJSON_HASWELL_SIMDUTF8CHECK_H
#define SIMDJSON_HASWELL_SIMDUTF8CHECK_H
#include "simdjson/portability.h"
#include "simdjson/simdutf8check.h"
#include "../simdutf8check.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@ -28,7 +28,8 @@
// all byte values must be no larger than 0xF4
TARGET_HASWELL
namespace simdjson {
namespace simdjson::haswell {
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
}
@ -192,6 +193,14 @@ avx_check_utf8_bytes(__m256i current_bytes,
return pb;
}
}; // namespace simdjson::haswell
UNTARGET_REGION // haswell
TARGET_HASWELL
namespace simdjson {
using namespace simdjson::haswell;
template <>
struct utf8_checker<Architecture::HASWELL> {
__m256i has_error;
@ -230,7 +239,7 @@ struct utf8_checker<Architecture::HASWELL> {
}
}; // struct utf8_checker
} // namespace simdjson
}; // namespace simdjson
UNTARGET_REGION // haswell
#endif // IS_X86_64

View File

@ -0,0 +1,185 @@
#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
#include "simdjson/portability.h"
#ifdef IS_X86_64
#include "haswell/architecture.h"
#include "haswell/simd_input.h"
#include "haswell/simdutf8check.h"
#include "simdjson/stage1_find_marks.h"
TARGET_HASWELL
namespace simdjson::haswell {
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
// There should be no such thing with a processing supporting avx2
// but not clmul.
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
return quote_mask;
}
static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
uint64_t &whitespace, uint64_t &structurals) {
#ifdef SIMDJSON_NAIVE_STRUCTURAL
// You should never need this naive approach, but it can be useful
// for research purposes
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_brace));
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_brace));
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_open_bracket));
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_open_bracket));
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_bracket));
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_bracket));
const __m256i mask_column = _mm256_set1_epi8(0x3a);
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_column));
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_column));
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_comma));
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_comma));
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
structurals = (structural_res_0 | (structural_res_1 << 32));
const __m256i mask_space = _mm256_set1_epi8(0x20);
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_linefeed));
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_linefeed));
const __m256i mask_tab = _mm256_set1_epi8(0x09);
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_tab));
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_tab));
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_carriage));
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_carriage));
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
whitespace = (ws_res_0 | (ws_res_1 << 32));
// end of naive approach
#else // SIMDJSON_NAIVE_STRUCTURAL
// clang-format off
const __m256i structural_table =
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
const __m256i white_table = _mm256_setr_epi8(
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
// clang-format on
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
const __m256i struct_mask = _mm256_set1_epi8(32);
__m256i lo_white = _mm256_cmpeq_epi8(in.lo, _mm256_shuffle_epi8(white_table, in.lo));
__m256i hi_white = _mm256_cmpeq_epi8(in.hi, _mm256_shuffle_epi8(white_table, in.hi));
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_white));
uint64_t ws_res_1 = _mm256_movemask_epi8(hi_white);
whitespace = (ws_res_0 | (ws_res_1 << 32));
__m256i lo_struct_r1 = _mm256_add_epi8(struct_offset, in.lo);
__m256i hi_struct_r1 = _mm256_add_epi8(struct_offset, in.hi);
__m256i lo_struct_r2 = _mm256_or_si256(in.lo, struct_mask);
__m256i hi_struct_r2 = _mm256_or_si256(in.hi, struct_mask);
__m256i lo_struct_r3 = _mm256_shuffle_epi8(structural_table, lo_struct_r1);
__m256i hi_struct_r3 = _mm256_shuffle_epi8(structural_table, hi_struct_r1);
__m256i lo_struct = _mm256_cmpeq_epi8(lo_struct_r2, lo_struct_r3);
__m256i hi_struct = _mm256_cmpeq_epi8(hi_struct_r2, hi_struct_r3);
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_struct));
uint64_t structural_res_1 = _mm256_movemask_epi8(hi_struct);
structurals = (structural_res_0 | (structural_res_1 << 32));
#endif // else SIMDJSON_NAIVE_STRUCTURAL
}
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
if (bits == 0)
return;
uint32_t cnt = _mm_popcnt_u64(bits);
uint32_t next_base = base + cnt;
idx -= 64;
base_ptr += base;
{
base_ptr[0] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[1] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[2] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[3] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[4] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[5] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[6] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[7] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr += 8;
}
// We hope that the next branch is easily predicted.
if (cnt > 8) {
base_ptr[0] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[1] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[2] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[3] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[4] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[5] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[6] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr[7] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr += 8;
}
if (cnt > 16) { // unluckly: we rarely get here
// since it means having one structural or pseudo-structral element
// every 4 characters (possible with inputs like "","","",...).
do {
base_ptr[0] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
base_ptr++;
} while (bits != 0);
}
base = next_base;
}
#include "generic/stage1_find_marks.h"
} // namespace haswell
UNTARGET_REGION
TARGET_HASWELL
namespace simdjson {
template <>
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
return haswell::find_structural_bits(buf, len, pj);
}
} // namespace simdjson
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H

View File

@ -0,0 +1,34 @@
#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
#include "simdjson/portability.h"
#ifdef IS_X86_64
#include "simdjson/stage2_build_tape.h"
#include "haswell/architecture.h"
#include "haswell/stringparsing.h"
TARGET_HASWELL
namespace simdjson::haswell {
#include "generic/stage2_build_tape.h"
} // namespace simdjson::haswell
UNTARGET_REGION
TARGET_HASWELL
namespace simdjson {
template <>
WARN_UNUSED int
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return haswell::unified_machine(buf, len, pj);
}
} // namespace simdjson
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H

View File

@ -1,15 +1,16 @@
#ifndef SIMDJSON_STRINGPARSING_HASWELL_H
#define SIMDJSON_STRINGPARSING_HASWELL_H
#ifndef SIMDJSON_HASWELL_STRINGPARSING_H
#define SIMDJSON_HASWELL_STRINGPARSING_H
#include "simdjson/stringparsing.h"
#include "../stringparsing.h"
#ifdef IS_X86_64
#include "haswell/architecture.h"
TARGET_HASWELL
namespace simdjson {
template <>
really_inline parse_string_helper
find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
uint8_t *dst) {
namespace simdjson::haswell {
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
@ -24,14 +25,11 @@ find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask)) // quote_bits
};
}
} // namespace simdjson
UNTARGET_REGION
#define TARGETED_ARCHITECTURE Architecture::HASWELL
#define TARGETED_REGION TARGET_HASWELL
#include "simdjson/stringparsing_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#include "generic/stringparsing.h"
} // namespace simdjson::haswell
UNTARGET_REGION
#endif // IS_X86_64

View File

@ -2,7 +2,7 @@
#define SIMDJSON_SIMDUTF8CHECK_H
#include "simdjson/simdjson.h"
#include "simdjson/simd_input.h"
#include "simd_input.h"
namespace simdjson {

View File

@ -1,34 +1,18 @@
#include "simdjson/stage1_find_marks.h"
#include "simdjson/portability.h"
#ifdef IS_X86_64
namespace {
// for when clmul is unavailable
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
quote_mask = quote_mask ^ (quote_mask << 2);
quote_mask = quote_mask ^ (quote_mask << 4);
quote_mask = quote_mask ^ (quote_mask << 8);
quote_mask = quote_mask ^ (quote_mask << 16);
quote_mask = quote_mask ^ (quote_mask << 32);
return quote_mask;
}
} // namespace
#define TARGETED_ARCHITECTURE Architecture::HASWELL
#define TARGETED_REGION TARGET_HASWELL
#include "simdjson/stage1_find_marks_flatten_haswell.h"
#include "simdjson/stage1_find_marks_haswell.h"
#include "simdjson/stage1_find_marks_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
#define TARGETED_REGION TARGET_WESTMERE
#include "simdjson/stage1_find_marks_flatten_common.h"
#include "simdjson/stage1_find_marks_westmere.h"
#include "simdjson/stage1_find_marks_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#endif // IS_X86_64
#ifdef IS_ARM64
#define TARGETED_ARCHITECTURE Architecture::ARM64
#define TARGETED_REGION TARGET_ARM64
#include "simdjson/stage1_find_marks_flatten_common.h"
#include "simdjson/stage1_find_marks_arm64.h"
#include "simdjson/stage1_find_marks_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#endif // IS_ARM64
#include "arm64/stage1_find_marks.h"
#include "haswell/stage1_find_marks.h"
#include "westmere/stage1_find_marks.h"

View File

@ -1,23 +1,3 @@
#include "simdjson/stage2_build_tape.h"
#ifdef IS_X86_64
#define TARGETED_ARCHITECTURE Architecture::HASWELL
#define TARGETED_REGION TARGET_HASWELL
#include "simdjson/stage2_build_tape_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
#define TARGETED_REGION TARGET_WESTMERE
#include "simdjson/stage2_build_tape_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#endif // IS_X86_64
#ifdef IS_ARM64
#define TARGETED_ARCHITECTURE Architecture::ARM64
#define TARGETED_REGION TARGET_ARM64
#include "simdjson/stage2_build_tape_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#endif // IS_ARM64
#include "arm64/stage2_build_tape.h"
#include "haswell/stage2_build_tape.h"
#include "westmere/stage2_build_tape.h"

View File

@ -12,6 +12,7 @@ void found_bad_string(const uint8_t *buf);
#endif
namespace simdjson {
// begin copypasta
// These chars yield themselves: " \ /
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
@ -84,22 +85,6 @@ struct parse_string_helper {
uint32_t quote_bits;
};
// Finds where the backslashes and quotes are located.
template <Architecture>
parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src,
uint8_t *dst);
template <Architecture T>
WARN_UNUSED
really_inline bool
parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj,
UNUSED const uint32_t depth, UNUSED uint32_t offset);
} // namespace simdjson
/// Now include the specializations:
#include "simdjson/stringparsing_arm64.h"
#include "simdjson/stringparsing_haswell.h"
#include "simdjson/stringparsing_westmere.h"
#endif
#endif // SIMDJSON_STRINGPARSING_H

View File

@ -0,0 +1,19 @@
#ifndef SIMDJSON_WESTMERE_ARCHITECTURE_H
#define SIMDJSON_WESTMERE_ARCHITECTURE_H
#include "simdjson/portability.h"
#ifdef IS_X86_64
#include "simdjson/simdjson.h"
namespace simdjson::westmere {
static const Architecture ARCHITECTURE = Architecture::WESTMERE;
} // namespace simdjson::westmere
#endif // IS_X86_64
#endif // SIMDJSON_WESTMERE_ARCHITECTURE_H

View File

@ -1,7 +1,7 @@
#ifndef SIMDJSON_SIMD_INPUT_WESTMERE_H
#define SIMDJSON_SIMD_INPUT_WESTMERE_H
#ifndef SIMDJSON_WESTMERE_SIMD_INPUT_H
#define SIMDJSON_WESTMERE_SIMD_INPUT_H
#include "simdjson/simd_input.h"
#include "../simd_input.h"
#ifdef IS_X86_64
@ -54,4 +54,4 @@ struct simd_input<Architecture::WESTMERE> {
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_SIMD_INPUT_WESTMERE_H
#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H

View File

@ -1,8 +1,8 @@
#ifndef SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
#define SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
#ifndef SIMDJSON_WESTMERE_SIMDUTF8CHECK_H
#define SIMDJSON_WESTMERE_SIMDUTF8CHECK_H
#include "simdjson/portability.h"
#include "simdjson/simdutf8check.h"
#include "simdutf8check.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@ -29,8 +29,7 @@
/********** sse code **********/
TARGET_WESTMERE
namespace simdjson {
namespace simdjson::westmere {
// all byte values must be no larger than 0xF4
static inline void check_smaller_than_0xF4(__m128i current_bytes,
@ -164,6 +163,14 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
return pb;
}
} // namespace simdjson::westmere
UNTARGET_REGION // westmere
TARGET_WESTMERE
namespace simdjson {
using namespace simdjson::westmere;
template <>
struct utf8_checker<Architecture::WESTMERE> {
__m128i has_error = _mm_setzero_si128();

View File

@ -1,30 +1,30 @@
#ifndef SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
#define SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
#include "simdjson/simd_input_westmere.h"
#include "simdjson/simdutf8check_westmere.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/portability.h"
#ifdef IS_X86_64
TARGET_WESTMERE
namespace simdjson {
#include "westmere/architecture.h"
#include "westmere/simd_input.h"
#include "westmere/simdutf8check.h"
#include "simdjson/stage1_find_marks.h"
template <>
really_inline uint64_t
compute_quote_mask<Architecture::WESTMERE>(uint64_t quote_bits) {
TARGET_WESTMERE
namespace simdjson::westmere {
static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
return _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
}
template <>
really_inline void find_whitespace_and_structurals<Architecture::WESTMERE>(
simd_input<Architecture::WESTMERE> in, uint64_t &whitespace,
uint64_t &structurals) {
static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
uint64_t &whitespace, uint64_t &structurals) {
const __m128i structural_table =
_mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
100, 9, 10, 112, 100, 13, 100, 100);
100, 9, 10, 112, 100, 13, 100, 100);
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
const __m128i struct_mask = _mm_set1_epi8(32);
@ -66,11 +66,25 @@ really_inline void find_whitespace_and_structurals<Architecture::WESTMERE>(
uint64_t structural_res_3 = _mm_movemask_epi8(struct4);
structurals = (structural_res_0 | (structural_res_1 << 16) |
(structural_res_2 << 32) | (structural_res_3 << 48));
(structural_res_2 << 32) | (structural_res_3 << 48));
}
#include "generic/stage1_find_marks_flatten.h"
#include "generic/stage1_find_marks.h"
} // namespace westmere
UNTARGET_REGION
TARGET_WESTMERE
namespace simdjson {
template <>
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
return westmere::find_structural_bits(buf, len, pj);
}
} // namespace simdjson
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H

View File

@ -0,0 +1,34 @@
#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
#include "simdjson/portability.h"
#ifdef IS_X86_64
#include "simdjson/stage2_build_tape.h"
#include "westmere/architecture.h"
#include "westmere/stringparsing.h"
TARGET_WESTMERE
namespace simdjson::westmere {
#include "generic/stage2_build_tape.h"
} // namespace simdjson::westmere
UNTARGET_REGION
TARGET_WESTMERE
namespace simdjson {
template <>
WARN_UNUSED int
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return westmere::unified_machine(buf, len, pj);
}
} // namespace simdjson
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H

View File

@ -1,15 +1,16 @@
#ifndef SIMDJSON_STRINGPARSING_WESTMERE_H
#define SIMDJSON_STRINGPARSING_WESTMERE_H
#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H
#define SIMDJSON_WESTMERE_STRINGPARSING_H
#include "simdjson/stringparsing.h"
#include "../stringparsing.h"
#ifdef IS_X86_64
#include "westmere/architecture.h"
TARGET_WESTMERE
namespace simdjson {
template <>
really_inline parse_string_helper
find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
uint8_t *dst) {
namespace simdjson::westmere {
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
__m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
@ -23,14 +24,11 @@ find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
static_cast<uint32_t>(_mm_movemask_epi8(quote_mask)) // quote_bits
};
}
} // namespace simdjson
UNTARGET_REGION
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
#define TARGETED_REGION TARGET_WESTMERE
#include "simdjson/stringparsing_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#include "generic/stringparsing.h"
} // namespace simdjson::westmere
UNTARGET_REGION
#endif // IS_X86_64