Use direct call to templated flatten_bits instead of if (#262)
* Use direct call to templated flatten_bits instead of if * Put really_inline back on find_structural_bits_64
This commit is contained in:
parent
1e26859bb7
commit
f3c3afd4cd
2
Makefile
2
Makefile
|
@ -62,7 +62,7 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
|
|||
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
|
||||
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
||||
|
||||
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten.h include/simdjson/stage1_find_marks_flatten_haswell.h
|
||||
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten_common.h include/simdjson/stage1_find_marks_flatten_haswell.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
|
||||
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
|
||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||
|
|
|
@ -17,6 +17,7 @@ $SCRIPTPATH/src/simdjson.cpp
|
|||
$SCRIPTPATH/src/jsonioutil.cpp
|
||||
$SCRIPTPATH/src/jsonminifier.cpp
|
||||
$SCRIPTPATH/src/jsonparser.cpp
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
|
||||
$SCRIPTPATH/src/stage1_find_marks.cpp
|
||||
$SCRIPTPATH/src/stage2_build_tape.cpp
|
||||
$SCRIPTPATH/src/parsedjson.cpp
|
||||
|
@ -41,8 +42,6 @@ $SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
|
|||
$SCRIPTPATH/include/simdjson/jsonminifier.h
|
||||
$SCRIPTPATH/include/simdjson/parsedjson.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
|
||||
|
|
|
@ -20,7 +20,7 @@ set(SIMDJSON_INCLUDE
|
|||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_arm64.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_common.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_common.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_haswell.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_haswell.h
|
||||
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_westmere.h
|
||||
|
|
|
@ -116,6 +116,15 @@ int find_structural_bits(const char *buf, size_t len,
|
|||
return find_structural_bits((const uint8_t *)buf, len, pj);
|
||||
}
|
||||
|
||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||
// plus their position in the bitvector, and store these indexes at
|
||||
// base_ptr[base] incrementing base as we go
|
||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||
// needs to be large enough to handle this
|
||||
template <Architecture T = Architecture::NATIVE>
|
||||
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits);
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include "simdjson/simdutf8check_arm64.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage1_find_marks_flatten.h"
|
||||
|
||||
#ifdef IS_ARM64
|
||||
namespace simdjson {
|
||||
|
|
|
@ -92,7 +92,7 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
|
|||
}
|
||||
|
||||
// Find structural bits in a 64-byte chunk.
|
||||
void find_structural_bits_64(
|
||||
really_inline void find_structural_bits_64(
|
||||
const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base,
|
||||
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
|
||||
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
|
||||
|
@ -113,12 +113,7 @@ void find_structural_bits_64(
|
|||
/* take the previous iterations structural bits, not our current
|
||||
* iteration,
|
||||
* and flatten */
|
||||
#ifdef IS_X86_64
|
||||
if (TARGETED_ARCHITECTURE == Architecture::HASWELL)
|
||||
simdjson::haswell::flatten_bits(base_ptr, base, idx, structurals);
|
||||
else
|
||||
#endif
|
||||
simdjson::flatten_bits(base_ptr, base, idx, structurals);
|
||||
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
|
||||
|
||||
uint64_t whitespace;
|
||||
find_whitespace_and_structurals<TARGETED_ARCHITECTURE>(in, whitespace,
|
||||
|
@ -200,12 +195,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
|
|||
|
||||
/* finally, flatten out the remaining structurals from the last iteration
|
||||
*/
|
||||
#ifdef IS_X86_64
|
||||
if (TARGETED_ARCHITECTURE == Architecture::HASWELL)
|
||||
simdjson::haswell::flatten_bits(base_ptr, base, idx, structurals);
|
||||
else
|
||||
#endif
|
||||
simdjson::flatten_bits(base_ptr, base, idx, structurals);
|
||||
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
|
||||
|
||||
pj.n_structural_indexes = base;
|
||||
/* a valid JSON file cannot have zero structural indexes - we should have
|
||||
|
|
|
@ -1,6 +1,12 @@
|
|||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
||||
// This file contains a non-architecture-specific version of "flatten" used in stage1.
|
||||
// It is intended to be included multiple times and compiled multiple times
|
||||
// We assume the file in which it is include already includes
|
||||
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
||||
|
||||
#ifdef TARGETED_ARCHITECTURE
|
||||
#ifdef TARGETED_REGION
|
||||
|
||||
TARGETED_REGION
|
||||
namespace simdjson {
|
||||
|
||||
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
|
||||
|
@ -8,8 +14,9 @@ namespace simdjson {
|
|||
// This is just a naive implementation. It should be normally
|
||||
// disable, but can be used for research purposes to compare
|
||||
// again our optimized version.
|
||||
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
template <>
|
||||
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
uint32_t *out_ptr = base_ptr + base;
|
||||
idx -= 64;
|
||||
while (bits != 0) {
|
||||
|
@ -26,8 +33,9 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
|||
// base_ptr[base] incrementing base as we go
|
||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||
// needs to be large enough to handle this
|
||||
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
template<>
|
||||
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
// In some instances, the next branch is expensive because it is mispredicted.
|
||||
// Unfortunately, in other cases,
|
||||
// it helps tremendously.
|
||||
|
@ -88,6 +96,13 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
|||
base = next_base;
|
||||
}
|
||||
#endif // SIMDJSON_NAIVE_FLATTEN
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
||||
#else
|
||||
#error TARGETED_REGION must be specified before including.
|
||||
#endif // TARGETED_REGION
|
||||
#else
|
||||
#error TARGETED_ARCHITECTURE must be specified before including.
|
||||
#endif // TARGETED_ARCHITECTURE
|
|
@ -1,8 +1,5 @@
|
|||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
|
||||
|
||||
// This file provides the same function as
|
||||
// stage1_find_marks_flatten.h, but uses Intel intrinsics.
|
||||
// stage1_find_marks_flatten_common.h, but uses Intel intrinsics.
|
||||
// This should provide better performance on Visual Studio
|
||||
// and other compilers that do a conservative optimization.
|
||||
|
||||
|
@ -20,15 +17,15 @@
|
|||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson {
|
||||
namespace haswell {
|
||||
|
||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||
// plus their position in the bitvector, and store these indexes at
|
||||
// base_ptr[base] incrementing base as we go
|
||||
// will potentially store extra values beyond end of valid bits, so base_ptr
|
||||
// needs to be large enough to handle this
|
||||
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
template<>
|
||||
really_inline void flatten_bits<Architecture::HASWELL>(uint32_t *base_ptr, uint32_t &base,
|
||||
uint32_t idx, uint64_t bits) {
|
||||
// In some instances, the next branch is expensive because it is mispredicted.
|
||||
// Unfortunately, in other cases,
|
||||
// it helps tremendously.
|
||||
|
@ -88,8 +85,6 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
|||
}
|
||||
base = next_base;
|
||||
}
|
||||
} // namespace haswell
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
#endif // IS_X86_64
|
||||
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include "simdjson/simdutf8check_haswell.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage1_find_marks_flatten_haswell.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include "simdjson/simdutf8check_westmere.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage1_find_marks_flatten.h"
|
||||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
|
|
|
@ -3,17 +3,18 @@
|
|||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "simdjson/stage1_find_marks_haswell.h"
|
||||
#include "simdjson/stage1_find_marks_westmere.h"
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::HASWELL
|
||||
#define TARGETED_REGION TARGET_HASWELL
|
||||
#include "simdjson/stage1_find_marks_flatten_haswell.h"
|
||||
#include "simdjson/stage1_find_marks_haswell.h"
|
||||
#include "simdjson/stage1_find_marks_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
|
||||
#define TARGETED_REGION TARGET_WESTMERE
|
||||
#include "simdjson/stage1_find_marks_flatten_common.h"
|
||||
#include "simdjson/stage1_find_marks_westmere.h"
|
||||
#include "simdjson/stage1_find_marks_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
|
@ -22,10 +23,10 @@
|
|||
|
||||
#ifdef IS_ARM64
|
||||
|
||||
#include "simdjson/stage1_find_marks_arm64.h"
|
||||
|
||||
#define TARGETED_ARCHITECTURE Architecture::ARM64
|
||||
#define TARGETED_REGION TARGET_ARM64
|
||||
#include "simdjson/stage1_find_marks_flatten_common.h"
|
||||
#include "simdjson/stage1_find_marks_arm64.h"
|
||||
#include "simdjson/stage1_find_marks_common.h"
|
||||
#undef TARGETED_ARCHITECTURE
|
||||
#undef TARGETED_REGION
|
||||
|
|
Loading…
Reference in New Issue