Use direct call to templated flatten_bits instead of if (#262)

* Use direct call to templated flatten_bits instead of if

* Put really_inline back on find_structural_bits_64
This commit is contained in:
John Keiser 2019-08-08 12:09:17 -07:00 committed by Daniel Lemire
parent 1e26859bb7
commit f3c3afd4cd
11 changed files with 48 additions and 42 deletions

View File

@ -62,7 +62,7 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten.h include/simdjson/stage1_find_marks_flatten_haswell.h
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten_common.h include/simdjson/stage1_find_marks_flatten_haswell.h
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
MINIFIERLIBFILES=src/jsonminifier.cpp

View File

@ -17,6 +17,7 @@ $SCRIPTPATH/src/simdjson.cpp
$SCRIPTPATH/src/jsonioutil.cpp
$SCRIPTPATH/src/jsonminifier.cpp
$SCRIPTPATH/src/jsonparser.cpp
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
$SCRIPTPATH/src/stage1_find_marks.cpp
$SCRIPTPATH/src/stage2_build_tape.cpp
$SCRIPTPATH/src/parsedjson.cpp
@ -41,8 +42,6 @@ $SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
$SCRIPTPATH/include/simdjson/jsonminifier.h
$SCRIPTPATH/include/simdjson/parsedjson.h
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h

View File

@ -20,7 +20,7 @@ set(SIMDJSON_INCLUDE
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_arm64.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_common.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_common.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_haswell.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_haswell.h
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_westmere.h

View File

@ -116,6 +116,15 @@ int find_structural_bits(const char *buf, size_t len,
return find_structural_bits((const uint8_t *)buf, len, pj);
}
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
template <Architecture T = Architecture::NATIVE>
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits);
} // namespace simdjson
#endif

View File

@ -3,7 +3,6 @@
#include "simdjson/simdutf8check_arm64.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage1_find_marks_flatten.h"
#ifdef IS_ARM64
namespace simdjson {

View File

@ -92,7 +92,7 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
}
// Find structural bits in a 64-byte chunk.
void find_structural_bits_64(
really_inline void find_structural_bits_64(
const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base,
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
@ -113,12 +113,7 @@ void find_structural_bits_64(
/* take the previous iterations structural bits, not our current
* iteration,
* and flatten */
#ifdef IS_X86_64
if (TARGETED_ARCHITECTURE == Architecture::HASWELL)
simdjson::haswell::flatten_bits(base_ptr, base, idx, structurals);
else
#endif
simdjson::flatten_bits(base_ptr, base, idx, structurals);
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
uint64_t whitespace;
find_whitespace_and_structurals<TARGETED_ARCHITECTURE>(in, whitespace,
@ -200,12 +195,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
/* finally, flatten out the remaining structurals from the last iteration
*/
#ifdef IS_X86_64
if (TARGETED_ARCHITECTURE == Architecture::HASWELL)
simdjson::haswell::flatten_bits(base_ptr, base, idx, structurals);
else
#endif
simdjson::flatten_bits(base_ptr, base, idx, structurals);
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
pj.n_structural_indexes = base;
/* a valid JSON file cannot have zero structural indexes - we should have

View File

@ -1,6 +1,12 @@
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
// This file contains a non-architecture-specific version of "flatten" used in stage1.
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is include already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
#ifdef TARGETED_ARCHITECTURE
#ifdef TARGETED_REGION
TARGETED_REGION
namespace simdjson {
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
@ -8,8 +14,9 @@ namespace simdjson {
// This is just a naive implementation. It should be normally
// disable, but can be used for research purposes to compare
// again our optimized version.
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
template <>
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
uint32_t *out_ptr = base_ptr + base;
idx -= 64;
while (bits != 0) {
@ -26,8 +33,9 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
template<>
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
@ -88,6 +96,13 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
base = next_base;
}
#endif // SIMDJSON_NAIVE_FLATTEN
} // namespace simdjson
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
} // namespace simdjson
UNTARGET_REGION
#else
#error TARGETED_REGION must be specified before including.
#endif // TARGETED_REGION
#else
#error TARGETED_ARCHITECTURE must be specified before including.
#endif // TARGETED_ARCHITECTURE

View File

@ -1,8 +1,5 @@
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
// This file provides the same function as
// stage1_find_marks_flatten.h, but uses Intel intrinsics.
// stage1_find_marks_flatten_common.h, but uses Intel intrinsics.
// This should provide better performance on Visual Studio
// and other compilers that do a conservative optimization.
@ -20,15 +17,15 @@
TARGET_HASWELL
namespace simdjson {
namespace haswell {
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
template<>
really_inline void flatten_bits<Architecture::HASWELL>(uint32_t *base_ptr, uint32_t &base,
uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
@ -88,8 +85,6 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
}
base = next_base;
}
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION
#endif // IS_X86_64
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H

View File

@ -3,7 +3,6 @@
#include "simdjson/simdutf8check_haswell.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage1_find_marks_flatten_haswell.h"
#ifdef IS_X86_64

View File

@ -3,7 +3,6 @@
#include "simdjson/simdutf8check_westmere.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage1_find_marks_flatten.h"
#ifdef IS_X86_64

View File

@ -3,17 +3,18 @@
#ifdef IS_X86_64
#include "simdjson/stage1_find_marks_haswell.h"
#include "simdjson/stage1_find_marks_westmere.h"
#define TARGETED_ARCHITECTURE Architecture::HASWELL
#define TARGETED_REGION TARGET_HASWELL
#include "simdjson/stage1_find_marks_flatten_haswell.h"
#include "simdjson/stage1_find_marks_haswell.h"
#include "simdjson/stage1_find_marks_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
#define TARGETED_ARCHITECTURE Architecture::WESTMERE
#define TARGETED_REGION TARGET_WESTMERE
#include "simdjson/stage1_find_marks_flatten_common.h"
#include "simdjson/stage1_find_marks_westmere.h"
#include "simdjson/stage1_find_marks_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION
@ -22,10 +23,10 @@
#ifdef IS_ARM64
#include "simdjson/stage1_find_marks_arm64.h"
#define TARGETED_ARCHITECTURE Architecture::ARM64
#define TARGETED_REGION TARGET_ARM64
#include "simdjson/stage1_find_marks_flatten_common.h"
#include "simdjson/stage1_find_marks_arm64.h"
#include "simdjson/stage1_find_marks_common.h"
#undef TARGETED_ARCHITECTURE
#undef TARGETED_REGION