faster minifier (#568)
* Fallback should use our scalar code. * parse should have a nicer error message. * Making it so that "minify" can use different architectures. * Let us change the minifier competition so that it tests all implementations. * Documenting the untaken optimization opportunity. Co-authored-by: John Keiser <john@johnkeiser.com>
This commit is contained in:
parent
293ec7aec5
commit
5d1e3efce8
4
Makefile
4
Makefile
|
@ -53,7 +53,7 @@ endif # ifeq ($(SANITIZE),1)
|
|||
endif # ifeq ($(MEMSANITIZE),1)
|
||||
|
||||
# Headers and sources
|
||||
SRCHEADERS_GENERIC=src/generic/atomparsing.h src/generic/numberparsing.h src/generic/json_scanner.h src/generic/json_string_scanner.h src/generic/json_structural_indexer.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/generic/stage2_streaming_build_tape.h src/generic/utf8_fastvalidate_algorithm.h src/generic/utf8_lookup_algorithm.h src/generic/utf8_lookup2_algorithm.h src/generic/utf8_range_algorithm.h src/generic/utf8_zwegner_algorithm.h
|
||||
SRCHEADERS_GENERIC=src/generic/atomparsing.h src/generic/numberparsing.h src/generic/json_scanner.h src/generic/json_string_scanner.h src/generic/json_structural_indexer.h src/generic/json_minifier.h src/generic/buf_block_reader.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/generic/stage2_streaming_build_tape.h src/generic/utf8_fastvalidate_algorithm.h src/generic/utf8_lookup_algorithm.h src/generic/utf8_lookup2_algorithm.h src/generic/utf8_range_algorithm.h src/generic/utf8_zwegner_algorithm.h
|
||||
SRCHEADERS_ARM64= src/arm64/bitmanipulation.h src/arm64/bitmask.h src/arm64/intrinsics.h src/arm64/numberparsing.h src/arm64/simd.h src/arm64/stage1_find_marks.h src/arm64/stage2_build_tape.h src/arm64/stringparsing.h
|
||||
SRCHEADERS_HASWELL= src/haswell/bitmanipulation.h src/haswell/bitmask.h src/haswell/intrinsics.h src/haswell/numberparsing.h src/haswell/simd.h src/haswell/stage1_find_marks.h src/haswell/stage2_build_tape.h src/haswell/stringparsing.h
|
||||
SRCHEADERS_FALLBACK= src/fallback/bitmanipulation.h src/fallback/implementation.h src/fallback/numberparsing.h src/fallback/stage1_find_marks.h src/fallback/stage2_build_tape.h src/fallback/stringparsing.h
|
||||
|
@ -61,7 +61,7 @@ SRCHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/we
|
|||
SRCHEADERS_SRC=src/isadetection.h src/jsoncharutils.h src/simdprune_tables.h src/implementation.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document_parser_callbacks.h
|
||||
SRCHEADERS=$(SRCHEADERS_SRC) $(SRCHEADERS_GENERIC) $(SRCHEADERS_ARM64) $(SRCHEADERS_HASWELL) $(SRCHEADERS_WESTMERE) $(SRCHEADERS_FALLBACK)
|
||||
|
||||
INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/inline/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/inline/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
|
||||
INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/inline/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/inline/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
|
||||
|
||||
ifeq ($(SIMDJSON_TEST_AMALGAMATED_HEADERS),1)
|
||||
HEADERS=singleheader/simdjson.h
|
||||
|
|
|
@ -143,7 +143,7 @@ int main(int argc, char *argv[]) {
|
|||
// parse_many
|
||||
const char * filename2 = argv[2];
|
||||
for (auto result : parser.load_many(filename2)) {
|
||||
error = result.error;
|
||||
error = result.error();
|
||||
}
|
||||
if (error) {
|
||||
std::cout << "parse_many failed" << std::endl;
|
||||
|
|
|
@ -98,16 +98,14 @@ int main(int argc, char *argv[]) {
|
|||
"despacing with RapidJSON Insitu", rapid_stringme_insitu((char *)buffer),
|
||||
memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
|
||||
size_t outlength = simdjson::json_minify((const uint8_t *)buffer, p.size(),
|
||||
(uint8_t *)buffer);
|
||||
if (verbose)
|
||||
std::cout << "json_minify length is " << outlength << std::endl;
|
||||
|
||||
size_t outlength;
|
||||
uint8_t *cbuffer = (uint8_t *)buffer;
|
||||
BEST_TIME("json_minify", simdjson::json_minify(cbuffer, p.size(), cbuffer),
|
||||
for (auto imple : simdjson::available_implementations) {
|
||||
BEST_TIME((std::string("simdjson->minify+")+imple->name()).c_str(), (imple->minify(cbuffer, p.size(), cbuffer, outlength), outlength),
|
||||
outlength, memcpy(buffer, p.data(), p.size()), repeat, volume,
|
||||
!just_data);
|
||||
}
|
||||
|
||||
printf("minisize = %zu, original size = %zu (minified down to %.2f percent "
|
||||
"of original) \n",
|
||||
outlength, p.size(), outlength * 100.0 / p.size());
|
||||
|
@ -121,8 +119,9 @@ int main(int argc, char *argv[]) {
|
|||
!just_data);
|
||||
|
||||
char *mini_buffer = simdjson::internal::allocate_padded_buffer(p.size() + 1);
|
||||
size_t minisize = simdjson::json_minify((const uint8_t *)p.data(), p.size(),
|
||||
(uint8_t *)mini_buffer);
|
||||
size_t minisize;
|
||||
simdjson::active_implementation->minify((const uint8_t *)p.data(), p.size(),
|
||||
(uint8_t *)mini_buffer, minisize);
|
||||
mini_buffer[minisize] = '\0';
|
||||
|
||||
BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(),
|
||||
|
@ -171,6 +170,7 @@ int main(int argc, char *argv[]) {
|
|||
automated_reallocation),
|
||||
simdjson::SUCCESS, memcpy(buffer, mini_buffer, p.size()), repeat, volume,
|
||||
!just_data);
|
||||
|
||||
free(buffer);
|
||||
free(ast_buffer);
|
||||
free(mini_buffer);
|
||||
|
|
|
@ -109,7 +109,12 @@ struct option_struct {
|
|||
case 'a': {
|
||||
const implementation *impl = simdjson::available_implementations[optarg];
|
||||
if (!impl) {
|
||||
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a haswell, westmere or arm64");
|
||||
std::string exit_message = string("Unsupported option value -a ") + optarg + ": expected -a with one of ";
|
||||
for (auto imple : simdjson::available_implementations) {
|
||||
exit_message += imple->name();
|
||||
exit_message += " ";
|
||||
}
|
||||
exit_usage(exit_message);
|
||||
}
|
||||
simdjson::active_implementation = impl;
|
||||
break;
|
||||
|
|
|
@ -16,7 +16,6 @@ set(SIMDJSON_INCLUDE
|
|||
${SIMDJSON_INCLUDE_DIR}/simdjson/inline/padded_string.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/internal/jsonformatutils.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonminifier.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonparser.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonstream.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/padded_string.h
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include "simdjson/implementation.h"
|
||||
#include "simdjson/document.h"
|
||||
#include "simdjson/document_stream.h"
|
||||
#include "simdjson/jsonminifier.h"
|
||||
|
||||
// Deprecated API
|
||||
#include "simdjson/parsedjsoniterator.h"
|
||||
|
|
|
@ -77,6 +77,17 @@ private:
|
|||
*/
|
||||
template<typename T>
|
||||
struct simdjson_result : public std::pair<T, error_code> {
|
||||
/**
|
||||
* Move the value and the error to the provided variables.
|
||||
*/
|
||||
void tie(T& t, error_code & e) {
|
||||
// on the clang compiler that comes with current macOS (Apple clang version 11.0.0),
|
||||
// tie(width, error) = size["w"].as_uint64_t();
|
||||
// fails with "error: no viable overloaded '='""
|
||||
t = std::move(this->first);
|
||||
e = std::move(this->second);
|
||||
}
|
||||
|
||||
/**
|
||||
* The error.
|
||||
*/
|
||||
|
@ -138,6 +149,7 @@ struct simdjson_move_result : std::pair<T, error_code> {
|
|||
t = std::move(this->first);
|
||||
e = std::move(this->second);
|
||||
}
|
||||
|
||||
/**
|
||||
* The error.
|
||||
*/
|
||||
|
|
|
@ -56,6 +56,19 @@ public:
|
|||
*/
|
||||
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* Run a full document parse (ensure_capacity, stage1 and stage2).
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
*
|
||||
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
||||
* @param len the length of the json document.
|
||||
* @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
||||
* @param dst_len the number of bytes written. Output only.
|
||||
* @return the error code, or SUCCESS if there was no error.
|
||||
*/
|
||||
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* Stage 1 of the document parser.
|
||||
*
|
||||
|
@ -182,6 +195,9 @@ public:
|
|||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final {
|
||||
return set_best()->parse(buf, len, parser);
|
||||
}
|
||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
|
||||
return set_best()->minify(buf, len, dst, dst_len);
|
||||
}
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final {
|
||||
return set_best()->stage1(buf, len, parser, streaming);
|
||||
}
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
#ifndef SIMDJSON_JSONMINIFIER_H
|
||||
#define SIMDJSON_JSONMINIFIER_H
|
||||
|
||||
#include "simdjson/padded_string.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string_view>
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
// Take input from buf and remove useless whitespace, write it to out; buf and
|
||||
// out can be the same pointer. Result is null terminated,
|
||||
// return the string length (minus the null termination).
|
||||
// The accelerated version of this function only runs on AVX2 hardware.
|
||||
size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out);
|
||||
|
||||
static inline size_t json_minify(const char *buf, size_t len, char *out) {
|
||||
return json_minify(reinterpret_cast<const uint8_t *>(buf), len,
|
||||
reinterpret_cast<uint8_t *>(out));
|
||||
}
|
||||
|
||||
static inline size_t json_minify(const std::string_view &p, char *out) {
|
||||
return json_minify(p.data(), p.size(), out);
|
||||
}
|
||||
|
||||
static inline size_t json_minify(const padded_string &p, char *out) {
|
||||
return json_minify(p.data(), p.size(), out);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_JSONMINIFIER_H
|
|
@ -29,7 +29,6 @@ set(SIMDJSON_SRC
|
|||
set(SIMDJSON_SRC_HEADERS
|
||||
implementation.cpp
|
||||
isadetection.h
|
||||
jsonminifier.cpp
|
||||
simdprune_tables.h
|
||||
stage1_find_marks.cpp
|
||||
stage2_build_tape.cpp
|
||||
|
|
|
@ -48,7 +48,7 @@ really_inline int leading_zeroes(uint64_t input_num) {
|
|||
}
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
really_inline int hamming(uint64_t input_num) {
|
||||
really_inline int count_ones(uint64_t input_num) {
|
||||
return vaddv_u8(vcnt_u8((uint8x8_t)input_num));
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ class implementation final : public simdjson::implementation {
|
|||
public:
|
||||
really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {}
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final;
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
#define SIMDJSON_ARM64_SIMD_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "simdprune_tables.h"
|
||||
#include "arm64/bitmanipulation.h"
|
||||
#include "arm64/intrinsics.h"
|
||||
|
||||
namespace simdjson::arm64::simd {
|
||||
|
@ -142,6 +144,43 @@ namespace simdjson::arm64::simd {
|
|||
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
|
||||
return lookup_table.apply_lookup_16_to(*this);
|
||||
}
|
||||
|
||||
|
||||
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
|
||||
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
|
||||
// Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes
|
||||
// get written.
|
||||
// Design consideration: it seems like a function with the
|
||||
// signature simd8<L> compress(uint16_t mask) would be
|
||||
// sensible, but the AVX ISA makes this kind of approach difficult.
|
||||
template<typename L>
|
||||
really_inline void compress(uint16_t mask, L * output) const {
|
||||
// this particular implementation was inspired by work done by @animetosho
|
||||
// we do it in two steps, first 8 bytes and then second 8 bytes
|
||||
uint8_t mask1 = static_cast<uint8_t>(mask); // least significant 8 bits
|
||||
uint8_t mask2 = static_cast<uint8_t>(mask >> 8); // most significant 8 bits
|
||||
// next line just loads the 64-bit values thintable_epi8[mask1] and
|
||||
// thintable_epi8[mask2] into a 128-bit register, using only
|
||||
// two instructions on most compilers.
|
||||
uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]};
|
||||
uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64);
|
||||
// we increment by 0x08 the second half of the mask
|
||||
uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08};
|
||||
shufmask = vaddq_u8(shufmask, inc);
|
||||
// this is the version "nearly pruned"
|
||||
uint8x16_t pruned = vqtbl1q_u8(*this, shufmask);
|
||||
// we still need to put the two halves together.
|
||||
// we compute the popcount of the first half:
|
||||
int pop1 = BitsSetTable256mul2[mask1];
|
||||
// then load the corresponding mask, what it does is to write
|
||||
// only the first pop1 bytes from the first 8 bytes, and then
|
||||
// it fills in with the bytes from the second 8 bytes + some filling
|
||||
// at the end.
|
||||
uint8x16_t compactmask = vld1q_u8((const uint8_t *)(pshufb_combine_table + pop1 * 8));
|
||||
uint8x16_t answer = vqtbl1q_u8(pruned, compactmask);
|
||||
vst1q_u8((uint8_t*) output, answer);
|
||||
}
|
||||
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup_16(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
|
@ -267,6 +306,13 @@ namespace simdjson::arm64::simd {
|
|||
this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
|
||||
}
|
||||
|
||||
really_inline void compress(uint64_t mask, T * output) const {
|
||||
this->chunks[0].compress(mask, output);
|
||||
this->chunks[1].compress(mask >> 16, output + 16 - count_ones(mask & 0xFFFF));
|
||||
this->chunks[2].compress(mask >> 32, output + 32 - count_ones(mask & 0xFFFFFFFF));
|
||||
this->chunks[3].compress(mask >> 48, output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static really_inline void each_index(F const& each) {
|
||||
each(0);
|
||||
|
@ -339,7 +385,6 @@ namespace simdjson::arm64::simd {
|
|||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](auto a) { return a <= mask; } ).to_bitmask();
|
||||
}
|
||||
|
||||
}; // struct simd8x64<T>
|
||||
|
||||
} // namespace simdjson::arm64::simd
|
||||
|
|
|
@ -31,6 +31,23 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
|||
return shuf_lo & shuf_hi;
|
||||
});
|
||||
|
||||
|
||||
// We compute whitespace and op separately. If the code later only use one or the
|
||||
// other, given the fact that all functions are aggressively inlined, we can
|
||||
// hope that useless computations will be omitted. This is namely case when
|
||||
// minifying (we only need whitespace). *However* if we only need spaces,
|
||||
// it is likely that we will still compute 'v' above with two lookup_16: one
|
||||
// could do it a bit cheaper. This is in contrast with the x64 implementations
|
||||
// where we can, efficiently, do the white space and structural matching
|
||||
// separately. One reason for this difference is that on ARM NEON, the table
|
||||
// lookups either zero or leave unchanged the characters exceeding 0xF whereas
|
||||
// on x64, the equivalent instruction (pshufb) automatically applies a mask,
|
||||
// ignoring the 4 most significant bits. Thus the x64 implementation is
|
||||
// optimized differently. This being said, if you use this code strictly
|
||||
// just for minification (or just to identify the structural characters),
|
||||
// there is a small untaken optimization opportunity here. We deliberately
|
||||
// do not pick it up.
|
||||
|
||||
uint64_t op = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x7); }).to_bitmask();
|
||||
uint64_t whitespace = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x18); }).to_bitmask();
|
||||
return { whitespace, op };
|
||||
|
@ -53,11 +70,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
|
|||
return is_second_byte ^ is_third_byte ^ is_fourth_byte;
|
||||
}
|
||||
|
||||
#include "generic/utf8_lookup2_algorithm.h"
|
||||
#include "generic/buf_block_reader.h"
|
||||
#include "generic/json_string_scanner.h"
|
||||
#include "generic/json_scanner.h"
|
||||
#include "generic/json_structural_indexer.h"
|
||||
|
||||
#include "generic/json_minifier.h"
|
||||
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
|
||||
return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
|
||||
}
|
||||
|
||||
#include "generic/utf8_lookup2_algorithm.h"
|
||||
#include "generic/json_structural_indexer.h"
|
||||
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept {
|
||||
return arm64::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ public:
|
|||
0
|
||||
) {}
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final;
|
||||
|
|
|
@ -151,6 +151,62 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, do
|
|||
return scanner.scan();
|
||||
}
|
||||
|
||||
// big table for the minifier
|
||||
static uint8_t jump_table[256 * 3] = {
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
|
||||
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
};
|
||||
|
||||
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
|
||||
size_t i = 0, pos = 0;
|
||||
uint8_t quote = 0;
|
||||
uint8_t nonescape = 1;
|
||||
|
||||
while (i < len) {
|
||||
unsigned char c = buf[i];
|
||||
uint8_t *meta = jump_table + 3 * c;
|
||||
|
||||
quote = quote ^ (meta[0] & nonescape);
|
||||
dst[pos] = c;
|
||||
pos += meta[2] | quote;
|
||||
|
||||
i += 1;
|
||||
nonescape = (~nonescape) | (meta[1]);
|
||||
}
|
||||
dst_len = pos; // we intentionally do not work with a reference
|
||||
// for fear of aliasing
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace simdjson::fallback
|
||||
|
||||
#endif // SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
// Walks through a buffer in block-sized increments, loading the last part with spaces
|
||||
template<size_t STEP_SIZE>
|
||||
struct buf_block_reader {
|
||||
public:
|
||||
really_inline buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
|
||||
really_inline size_t block_index() { return idx; }
|
||||
really_inline bool has_full_block() const {
|
||||
return idx < lenminusstep;
|
||||
}
|
||||
really_inline const uint8_t *full_block() const {
|
||||
return &buf[idx];
|
||||
}
|
||||
really_inline bool has_remainder() const {
|
||||
return idx < len;
|
||||
}
|
||||
really_inline void get_remainder(uint8_t *tmp_buf) const {
|
||||
memset(tmp_buf, 0x20, STEP_SIZE);
|
||||
memcpy(tmp_buf, buf + idx, len - idx);
|
||||
}
|
||||
really_inline void advance() {
|
||||
idx += STEP_SIZE;
|
||||
}
|
||||
private:
|
||||
const uint8_t *buf;
|
||||
const size_t len;
|
||||
const size_t lenminusstep;
|
||||
size_t idx;
|
||||
};
|
||||
|
||||
// Routines to print masks and text for debugging bitmask operations
|
||||
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
|
||||
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
|
||||
in.store((uint8_t*)buf);
|
||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||
if (buf[i] < ' ') { buf[i] = '_'; }
|
||||
}
|
||||
buf[sizeof(simd8x64<uint8_t>)] = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
UNUSED static char * format_mask(uint64_t mask) {
|
||||
static char *buf = (char*)malloc(64 + 1);
|
||||
for (size_t i=0; i<64; i++) {
|
||||
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
||||
}
|
||||
buf[64] = '\0';
|
||||
return buf;
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// This file contains the common code every implementation uses in stage1
|
||||
// It is intended to be included multiple times and compiled multiple times
|
||||
// We assume the file in which it is included already includes
|
||||
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
|
||||
|
||||
namespace stage1 {
|
||||
|
||||
class json_minifier {
|
||||
public:
|
||||
template<size_t STEP_SIZE>
|
||||
static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
|
||||
|
||||
private:
|
||||
really_inline json_minifier(uint8_t *_dst) : dst{_dst} {}
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||
really_inline void next(simd::simd8x64<uint8_t> in, json_block block);
|
||||
really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
||||
json_scanner scanner;
|
||||
uint8_t *dst;
|
||||
};
|
||||
|
||||
really_inline void json_minifier::next(simd::simd8x64<uint8_t> in, json_block block) {
|
||||
uint64_t mask = block.whitespace();
|
||||
in.compress(mask, dst);
|
||||
dst += 64 - count_ones(mask);
|
||||
}
|
||||
|
||||
really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
|
||||
*dst = '\0';
|
||||
error_code error = scanner.finish(false);
|
||||
if (error) { dst_len = 0; return error; }
|
||||
dst_len = dst - dst_start;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block_buf);
|
||||
simd::simd8x64<uint8_t> in_2(block_buf+64);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
json_block block_2 = scanner.next(in_2);
|
||||
this->next(in_1, block_1);
|
||||
this->next(in_2, block_2);
|
||||
reader.advance();
|
||||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block_buf);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
this->next(block_buf, block_1);
|
||||
reader.advance();
|
||||
}
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
|
||||
buf_block_reader<STEP_SIZE> reader(buf, len);
|
||||
json_minifier minifier(dst);
|
||||
while (reader.has_full_block()) {
|
||||
minifier.step<STEP_SIZE>(reader.full_block(), reader);
|
||||
}
|
||||
|
||||
if (likely(reader.has_remainder())) {
|
||||
uint8_t block[STEP_SIZE];
|
||||
reader.get_remainder(block);
|
||||
minifier.step<STEP_SIZE>(block, reader);
|
||||
}
|
||||
|
||||
return minifier.finish(dst, dst_len);
|
||||
}
|
||||
|
||||
} // namespace stage1
|
|
@ -5,23 +5,33 @@ namespace stage1 {
|
|||
*/
|
||||
struct json_block {
|
||||
public:
|
||||
// the start of structurals that are not inside strings
|
||||
/** The start of structurals */
|
||||
really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); }
|
||||
/** All JSON whitespace (i.e. not in a string) */
|
||||
really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); }
|
||||
|
||||
// operators plus scalar starts like 123, true and "abc"
|
||||
really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); }
|
||||
// the start of non-operator runs, like 123, true and "abc"
|
||||
really_inline uint64_t potential_scalar_start() { return _characters.scalar() & ~follows_potential_scalar(); }
|
||||
// whether the given character is immediately after a non-operator like 123, true or "
|
||||
really_inline uint64_t follows_potential_scalar() { return _follows_potential_scalar; }
|
||||
// Return a mask of whether the given characters are inside a string (only works on non-quotes)
|
||||
// Helpers
|
||||
|
||||
/** Whether the given characters are inside a string (only works on non-quotes) */
|
||||
really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); }
|
||||
/** Whether the given characters are outside a string (only works on non-quotes) */
|
||||
really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); }
|
||||
|
||||
// string and escape characters
|
||||
json_string_block _string;
|
||||
// whitespace, operators, scalars
|
||||
json_character_block _characters;
|
||||
// whether the previous character was a scalar
|
||||
uint64_t _follows_potential_scalar;
|
||||
private:
|
||||
// Potential structurals (i.e. disregarding strings)
|
||||
|
||||
/** operators plus scalar starts like 123, true and "abc" */
|
||||
really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); }
|
||||
/** the start of non-operator runs, like 123, true and "abc" */
|
||||
really_inline uint64_t potential_scalar_start() { return _characters.scalar() & ~follows_potential_scalar(); }
|
||||
/** whether the given character is immediately after a non-operator like 123, true or " */
|
||||
really_inline uint64_t follows_potential_scalar() { return _follows_potential_scalar; }
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -14,7 +14,9 @@ struct json_string_block {
|
|||
// Only characters inside the string (not including the quotes)
|
||||
really_inline uint64_t string_content() const { return _in_string & ~_quote; }
|
||||
// Return a mask of whether the given characters are inside a string (only works on non-quotes)
|
||||
really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return _in_string & mask; }
|
||||
really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
|
||||
// Return a mask of whether the given characters are inside a string (only works on non-quotes)
|
||||
really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
|
||||
// Tail of string (everything except the start quote)
|
||||
really_inline uint64_t string_tail() const { return _in_string ^ _quote; }
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ public:
|
|||
// it helps tremendously.
|
||||
if (bits == 0)
|
||||
return;
|
||||
uint32_t cnt = hamming(bits);
|
||||
uint32_t cnt = count_ones(bits);
|
||||
|
||||
// Do the first 8 all together
|
||||
for (int i=0; i<8; i++) {
|
||||
|
@ -55,55 +55,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
// Routines to print masks and text for debugging bitmask operations
|
||||
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
|
||||
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
|
||||
in.store((uint8_t*)buf);
|
||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||
if (buf[i] < ' ') { buf[i] = '_'; }
|
||||
}
|
||||
buf[sizeof(simd8x64<uint8_t>)] = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
UNUSED static char * format_mask(uint64_t mask) {
|
||||
static char *buf = (char*)malloc(64 + 1);
|
||||
for (size_t i=0; i<64; i++) {
|
||||
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
||||
}
|
||||
buf[64] = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
// Walks through a buffer in block-sized increments, loading the last part with spaces
|
||||
template<size_t STEP_SIZE>
|
||||
struct buf_block_reader {
|
||||
public:
|
||||
really_inline buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
|
||||
really_inline size_t block_index() { return idx; }
|
||||
really_inline bool has_full_block() const {
|
||||
return idx < lenminusstep;
|
||||
}
|
||||
really_inline const uint8_t *full_block() const {
|
||||
return &buf[idx];
|
||||
}
|
||||
really_inline bool has_remainder() const {
|
||||
return idx < len;
|
||||
}
|
||||
really_inline void get_remainder(uint8_t *tmp_buf) const {
|
||||
memset(tmp_buf, 0x20, STEP_SIZE);
|
||||
memcpy(tmp_buf, buf + idx, len - idx);
|
||||
}
|
||||
really_inline void advance() {
|
||||
idx += STEP_SIZE;
|
||||
}
|
||||
private:
|
||||
const uint8_t *buf;
|
||||
const size_t len;
|
||||
const size_t lenminusstep;
|
||||
size_t idx;
|
||||
};
|
||||
|
||||
class json_structural_indexer {
|
||||
public:
|
||||
template<size_t STEP_SIZE>
|
||||
|
@ -112,7 +63,7 @@ public:
|
|||
private:
|
||||
really_inline json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline void index_step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
|
||||
really_inline error_code finish(document::parser &parser, size_t idx, size_t len, bool streaming);
|
||||
|
||||
|
@ -162,7 +113,7 @@ really_inline error_code json_structural_indexer::finish(document::parser &parse
|
|||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_structural_indexer::index_step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
|
||||
really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block);
|
||||
simd::simd8x64<uint8_t> in_2(block+64);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
|
@ -173,7 +124,7 @@ really_inline void json_structural_indexer::index_step<128>(const uint8_t *block
|
|||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_structural_indexer::index_step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
|
||||
really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
this->next(in_1, block_1, reader.block_index());
|
||||
|
@ -209,13 +160,13 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, docume
|
|||
buf_block_reader<STEP_SIZE> reader(buf, len);
|
||||
json_structural_indexer indexer(parser.structural_indexes.get());
|
||||
while (reader.has_full_block()) {
|
||||
indexer.index_step<STEP_SIZE>(reader.full_block(), reader);
|
||||
indexer.step<STEP_SIZE>(reader.full_block(), reader);
|
||||
}
|
||||
|
||||
if (likely(reader.has_remainder())) {
|
||||
uint8_t block[STEP_SIZE];
|
||||
reader.get_remainder(block);
|
||||
indexer.index_step<STEP_SIZE>(block, reader);
|
||||
indexer.step<STEP_SIZE>(block, reader);
|
||||
}
|
||||
|
||||
return indexer.finish(parser, reader.block_index(), len, streaming);
|
||||
|
|
|
@ -37,7 +37,7 @@ really_inline int leading_zeroes(uint64_t input_num) {
|
|||
return static_cast<int>(_lzcnt_u64(input_num));
|
||||
}
|
||||
|
||||
really_inline int hamming(uint64_t input_num) {
|
||||
really_inline int count_ones(uint64_t input_num) {
|
||||
#ifdef _MSC_VER
|
||||
// note: we do not support legacy 32-bit Windows
|
||||
return __popcnt64(input_num);// Visual Studio wants two underscores
|
||||
|
|
|
@ -14,6 +14,7 @@ public:
|
|||
instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2
|
||||
) {}
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final;
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
#define SIMDJSON_HASWELL_SIMD_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "simdprune_tables.h"
|
||||
#include "haswell/bitmanipulation.h"
|
||||
#include "haswell/intrinsics.h"
|
||||
|
||||
TARGET_HASWELL
|
||||
|
@ -109,6 +111,57 @@ namespace simdjson::haswell::simd {
|
|||
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
|
||||
return _mm256_shuffle_epi8(lookup_table, *this);
|
||||
}
|
||||
|
||||
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
|
||||
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
|
||||
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
|
||||
// get written.
|
||||
// Design consideration: it seems like a function with the
|
||||
// signature simd8<L> compress(uint32_t mask) would be
|
||||
// sensible, but the AVX ISA makes this kind of approach difficult.
|
||||
template<typename L>
|
||||
really_inline void compress(uint32_t mask, L * output) const {
|
||||
// this particular implementation was inspired by work done by @animetosho
|
||||
// we do it in four steps, first 8 bytes and then second 8 bytes...
|
||||
uint8_t mask1 = static_cast<uint8_t>(mask); // least significant 8 bits
|
||||
uint8_t mask2 = static_cast<uint8_t>(mask >> 8); // second least significant 8 bits
|
||||
uint8_t mask3 = static_cast<uint8_t>(mask >> 16); // ...
|
||||
uint8_t mask4 = static_cast<uint8_t>(mask >> 24); // ...
|
||||
// next line just loads the 64-bit values thintable_epi8[mask1] and
|
||||
// thintable_epi8[mask2] into a 128-bit register, using only
|
||||
// two instructions on most compilers.
|
||||
__m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3],
|
||||
thintable_epi8[mask2], thintable_epi8[mask1]);
|
||||
// we increment by 0x08 the second half of the mask and so forth
|
||||
shufmask =
|
||||
_mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818,
|
||||
0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0));
|
||||
// this is the version "nearly pruned"
|
||||
__m256i pruned = _mm256_shuffle_epi8(*this, shufmask);
|
||||
// we still need to put the pieces back together.
|
||||
// we compute the popcount of the first words:
|
||||
int pop1 = BitsSetTable256mul2[mask1];
|
||||
int pop3 = BitsSetTable256mul2[mask3];
|
||||
|
||||
// then load the corresponding mask
|
||||
// could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic.
|
||||
__m256i v256 = _mm256_castsi128_si256(
|
||||
_mm_loadu_si128((const __m128i *)(pshufb_combine_table + pop1 * 8)));
|
||||
__m256i compactmask = _mm256_insertf128_si256(v256,
|
||||
_mm_loadu_si128((const __m128i *)(pshufb_combine_table + pop3 * 8)), 1);
|
||||
__m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask);
|
||||
// We just need to write out the result.
|
||||
// This is the tricky bit that is hard to do
|
||||
// if we want to return a SIMD register, since there
|
||||
// is no single-instruction approach to recombine
|
||||
// the two 128-bit lanes with an offset.
|
||||
__m128i v128;
|
||||
v128 = _mm256_castsi256_si128(almostthere);
|
||||
_mm_storeu_si128( (__m128i *)output, v128);
|
||||
v128 = _mm256_extractf128_si256(almostthere, 1);
|
||||
_mm_storeu_si128( (__m128i *)(output + 16 - count_ones(mask & 0xFFFF)), v128);
|
||||
}
|
||||
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup_16(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
|
@ -249,6 +302,13 @@ namespace simdjson::haswell::simd {
|
|||
each(1);
|
||||
}
|
||||
|
||||
really_inline void compress(uint64_t mask, T * output) const {
|
||||
uint32_t mask1 = static_cast<uint32_t>(mask);
|
||||
uint32_t mask2 = static_cast<uint32_t>(mask >> 32);
|
||||
this->chunks[0].compress(mask1, output);
|
||||
this->chunks[1].compress(mask2, output + 32 - count_ones(mask1));
|
||||
}
|
||||
|
||||
really_inline void store(T ptr[64]) const {
|
||||
this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
|
||||
this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
|
||||
|
@ -269,6 +329,8 @@ namespace simdjson::haswell::simd {
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(const simd8x64<uint8_t> b, F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
|
@ -302,7 +364,6 @@ namespace simdjson::haswell::simd {
|
|||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](auto a) { return a <= mask; } ).to_bitmask();
|
||||
}
|
||||
|
||||
}; // struct simd8x64<T>
|
||||
|
||||
} // namespace simdjson::haswell::simd
|
||||
|
|
|
@ -30,6 +30,11 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
|||
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
|
||||
auto op_table = simd8<uint8_t>::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
|
||||
|
||||
// We compute whitespace and op separately. If the code later only use one or the
|
||||
// other, given the fact that all functions are aggressively inlined, we can
|
||||
// hope that useless computations will be omitted. This is namely case when
|
||||
// minifying (we only need whitespace).
|
||||
|
||||
uint64_t whitespace = in.map([&](simd8<uint8_t> _in) {
|
||||
return _in == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, _in));
|
||||
}).to_bitmask();
|
||||
|
@ -54,11 +59,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
|
|||
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
|
||||
}
|
||||
|
||||
#include "generic/utf8_lookup2_algorithm.h"
|
||||
#include "generic/buf_block_reader.h"
|
||||
#include "generic/json_string_scanner.h"
|
||||
#include "generic/json_scanner.h"
|
||||
#include "generic/json_structural_indexer.h"
|
||||
|
||||
#include "generic/json_minifier.h"
|
||||
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
|
||||
return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
|
||||
}
|
||||
|
||||
#include "generic/utf8_lookup2_algorithm.h"
|
||||
#include "generic/json_structural_indexer.h"
|
||||
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept {
|
||||
return haswell::stage1::json_structural_indexer::index<128>(buf, len, parser, streaming);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#include "simdjson.h"
|
||||
#include "isadetection.h"
|
||||
#include "simdprune_tables.h"
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
// Static array of known implementations. We're hoping these get baked into the executable
|
||||
|
@ -48,6 +50,9 @@ public:
|
|||
WARN_UNUSED error_code parse(const uint8_t *, size_t, document::parser &) const noexcept final {
|
||||
return UNSUPPORTED_ARCHITECTURE;
|
||||
}
|
||||
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final {
|
||||
return UNSUPPORTED_ARCHITECTURE;
|
||||
}
|
||||
WARN_UNUSED error_code stage1(const uint8_t *, size_t, document::parser &, bool) const noexcept final {
|
||||
return UNSUPPORTED_ARCHITECTURE;
|
||||
}
|
||||
|
|
|
@ -1,478 +0,0 @@
|
|||
#include "simdjson.h"
|
||||
#include <cstdint>
|
||||
|
||||
#ifndef SIMDJSON_ISSUE384RESOLVED // to avoid tripping users
|
||||
|
||||
namespace simdjson {
|
||||
static uint8_t jump_table[256 * 3] = {
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
|
||||
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||
};
|
||||
|
||||
size_t json_minify(const unsigned char *bytes, size_t how_many,
|
||||
unsigned char *out) {
|
||||
size_t i = 0, pos = 0;
|
||||
uint8_t quote = 0;
|
||||
uint8_t nonescape = 1;
|
||||
|
||||
while (i < how_many) {
|
||||
unsigned char c = bytes[i];
|
||||
uint8_t *meta = jump_table + 3 * c;
|
||||
|
||||
quote = quote ^ (meta[0] & nonescape);
|
||||
out[pos] = c;
|
||||
pos += meta[2] | quote;
|
||||
|
||||
i += 1;
|
||||
nonescape = (~nonescape) | (meta[1]);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
} // namespace simdjson
|
||||
#else
|
||||
|
||||
//
|
||||
// This fast code is disabled.
|
||||
// See issue https://github.com/lemire/simdjson/issues/384
|
||||
//
|
||||
#include "simdprune_tables.h"
|
||||
#include <cstring>
|
||||
#include <x86intrin.h> // currently, there is no runtime dispatch for the minifier
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
// a straightforward comparison of a mask against input.
|
||||
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
||||
__m256i mask) {
|
||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||
return res_0 | (res_1 << 32);
|
||||
}
|
||||
|
||||
// Write up to 16 bytes, only the bytes corresponding to a 1-bit are written
|
||||
// out. credit: Anime Tosho
|
||||
static __m128i skinnycleanm128(__m128i x, int mask) {
|
||||
int mask1 = mask & 0xFF;
|
||||
int mask2 = (mask >> 8) & 0xFF;
|
||||
__m128i shufmask = _mm_castps_si128(
|
||||
_mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64(
|
||||
(const __m128i *)(thintable_epi8 + mask1))),
|
||||
(const __m64 *)(thintable_epi8 + mask2)));
|
||||
shufmask =
|
||||
_mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0));
|
||||
__m128i pruned = _mm_shuffle_epi8(x, shufmask);
|
||||
intptr_t popx2 = BitsSetTable256mul2[mask1];
|
||||
__m128i compactmask =
|
||||
_mm_loadu_si128((const __m128i *)(pshufb_combine_table + popx2 * 8));
|
||||
return _mm_shuffle_epi8(pruned, compactmask);
|
||||
}
|
||||
|
||||
// take input from buf and remove useless whitespace, input and output can be
|
||||
// the same, result is null terminated, return the string length (minus the null
|
||||
// termination)
|
||||
size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||
// Useful constant masks
|
||||
const uint64_t even_bits = 0x5555555555555555ULL;
|
||||
const uint64_t odd_bits = ~even_bits;
|
||||
uint8_t *initout(out);
|
||||
uint64_t prev_iter_ends_odd_backslash =
|
||||
0ULL; // either 0 or 1, but a 64-bit value
|
||||
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
||||
size_t idx = 0;
|
||||
if (len >= 64) {
|
||||
size_t avx_len = len - 63;
|
||||
|
||||
for (; idx < avx_len; idx += 64) {
|
||||
__m256i input_lo =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
||||
__m256i input_hi =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
||||
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||
_mm256_set1_epi8('\\'));
|
||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
||||
uint64_t even_starts = start_edges & even_start_mask;
|
||||
uint64_t odd_starts = start_edges & ~even_start_mask;
|
||||
uint64_t even_carries = bs_bits + even_starts;
|
||||
uint64_t odd_carries;
|
||||
bool iter_ends_odd_backslash =
|
||||
add_overflow(bs_bits, odd_starts, &odd_carries);
|
||||
odd_carries |= prev_iter_ends_odd_backslash;
|
||||
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
||||
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
||||
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
||||
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
||||
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
||||
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
||||
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||
_mm256_set1_epi8('"'));
|
||||
quote_bits = quote_bits & ~odd_ends;
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = static_cast<uint64_t>(
|
||||
static_cast<int64_t>(quote_mask) >>
|
||||
63); // might be undefined behavior, should be fully defined in C++20,
|
||||
// ok according to John Regher from Utah University
|
||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 9 a b c d
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||
const __m256i high_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 2 3 5 7
|
||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
|
||||
1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
||||
__m256i v_lo = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
||||
_mm256_shuffle_epi8(high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
||||
_mm256_set1_epi8(0x7f))));
|
||||
|
||||
__m256i v_hi = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
||||
_mm256_shuffle_epi8(high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
||||
_mm256_set1_epi8(0x7f))));
|
||||
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
||||
uint64_t ws_res_0 =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||
whitespace &= ~quote_mask;
|
||||
|
||||
uint64_t non_whitespace = ~whitespace;
|
||||
|
||||
__m128i x1 = _mm256_extracti128_si256(input_lo, 0);
|
||||
__m128i x2 = _mm256_extracti128_si256(input_lo, 1);
|
||||
__m128i x3 = _mm256_extracti128_si256(input_hi, 0);
|
||||
__m128i x4 = _mm256_extracti128_si256(input_hi, 1);
|
||||
|
||||
int mask1 = non_whitespace & 0xFFFF;
|
||||
int mask2 = (non_whitespace >> 16) & 0xFFFF;
|
||||
int mask3 = (non_whitespace >> 32) & 0xFFFF;
|
||||
int mask4 = (non_whitespace >> 48) & 0xFFFF;
|
||||
|
||||
x1 = skinnycleanm128(x1, mask1);
|
||||
x2 = skinnycleanm128(x2, mask2);
|
||||
x3 = skinnycleanm128(x3, mask3);
|
||||
x4 = skinnycleanm128(x4, mask4);
|
||||
int pop1 = hamming(non_whitespace & 0xFFFF);
|
||||
int pop2 = hamming(non_whitespace & UINT64_C(0xFFFFFFFF));
|
||||
int pop3 = hamming(non_whitespace & UINT64_C(0xFFFFFFFFFFFF));
|
||||
int pop4 = hamming(non_whitespace);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out), x1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop1), x2);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop2), x3);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop3), x4);
|
||||
out += pop4;
|
||||
}
|
||||
}
|
||||
// we finish off the job... copying and pasting the code is not ideal here,
|
||||
// but it gets the job done.
|
||||
if (idx < len) {
|
||||
uint8_t buffer[64];
|
||||
memset(buffer, 0, 64);
|
||||
memcpy(buffer, buf + idx, len - idx);
|
||||
__m256i input_lo =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
|
||||
__m256i input_hi =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
|
||||
uint64_t bs_bits =
|
||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
||||
uint64_t even_starts = start_edges & even_start_mask;
|
||||
uint64_t odd_starts = start_edges & ~even_start_mask;
|
||||
uint64_t even_carries = bs_bits + even_starts;
|
||||
uint64_t odd_carries;
|
||||
// bool iter_ends_odd_backslash =
|
||||
add_overflow(bs_bits, odd_starts, &odd_carries);
|
||||
odd_carries |= prev_iter_ends_odd_backslash;
|
||||
// prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
||||
// // we never use it
|
||||
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
||||
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
||||
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
||||
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
||||
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
||||
uint64_t quote_bits =
|
||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
|
||||
quote_bits = quote_bits & ~odd_ends;
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
// prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we
|
||||
// don't need this anymore
|
||||
|
||||
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
||||
__m256i mask_70 =
|
||||
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
|
||||
// but moves any value >= 16 above 128
|
||||
|
||||
__m256i lut_cntrl = _mm256_setr_epi8(
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
|
||||
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
|
||||
|
||||
__m256i tmp_ws_lo = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi8(mask_20, input_lo),
|
||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
|
||||
__m256i tmp_ws_hi = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi8(mask_20, input_hi),
|
||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
whitespace &= ~quote_mask;
|
||||
|
||||
if (len - idx < 64) {
|
||||
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
|
||||
}
|
||||
uint64_t non_whitespace = ~whitespace;
|
||||
|
||||
int mask1 = non_whitespace & 0xFFFF;
|
||||
int mask2 = (non_whitespace >> 16) & 0xFFFF;
|
||||
int mask3 = (non_whitespace >> 32) & 0xFFFF;
|
||||
int mask4 = (non_whitespace >> 48) & 0xFFFF;
|
||||
|
||||
x1 = skinnycleanm128(x1, mask1);
|
||||
x2 = skinnycleanm128(x2, mask2);
|
||||
x3 = skinnycleanm128(x3, mask3);
|
||||
x4 = skinnycleanm128(x4, mask4);
|
||||
int pop1 = hamming(non_whitespace & 0xFFFF);
|
||||
int pop2 = hamming(non_whitespace & UINT64_C(0xFFFFFFFF));
|
||||
int pop3 = hamming(non_whitespace & UINT64_C(0xFFFFFFFFFFFF));
|
||||
int pop4 = hamming(non_whitespace);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out), x1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop1), x2);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop2), x3);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop3), x4);
|
||||
out += pop4;
|
||||
}
|
||||
*out = '\0'; // NULL termination
|
||||
return out - initout;
|
||||
}
|
||||
|
||||
size_t oldjson_minify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||
// Useful constant masks
|
||||
const uint64_t even_bits = 0x5555555555555555ULL;
|
||||
const uint64_t odd_bits = ~even_bits;
|
||||
uint8_t *initout(out);
|
||||
uint64_t prev_iter_ends_odd_backslash =
|
||||
0ULL; // either 0 or 1, but a 64-bit value
|
||||
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
||||
size_t idx = 0;
|
||||
if (len >= 64) {
|
||||
size_t avx_len = len - 63;
|
||||
|
||||
for (; idx < avx_len; idx += 64) {
|
||||
__m256i input_lo =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
||||
__m256i input_hi =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
||||
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||
_mm256_set1_epi8('\\'));
|
||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
||||
uint64_t even_starts = start_edges & even_start_mask;
|
||||
uint64_t odd_starts = start_edges & ~even_start_mask;
|
||||
uint64_t even_carries = bs_bits + even_starts;
|
||||
uint64_t odd_carries;
|
||||
bool iter_ends_odd_backslash =
|
||||
add_overflow(bs_bits, odd_starts, &odd_carries);
|
||||
odd_carries |= prev_iter_ends_odd_backslash;
|
||||
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
||||
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
||||
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
||||
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
||||
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
||||
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
||||
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||
_mm256_set1_epi8('"'));
|
||||
quote_bits = quote_bits & ~odd_ends;
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = static_cast<uint64_t>(
|
||||
static_cast<int64_t>(quote_mask) >>
|
||||
63); // might be undefined behavior, should be fully defined in C++20,
|
||||
// ok according to John Regher from Utah University
|
||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 9 a b c d
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||
const __m256i high_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 2 3 5 7
|
||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
|
||||
1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
||||
__m256i v_lo = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
||||
_mm256_shuffle_epi8(high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
||||
_mm256_set1_epi8(0x7f))));
|
||||
|
||||
__m256i v_hi = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
||||
_mm256_shuffle_epi8(high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
||||
_mm256_set1_epi8(0x7f))));
|
||||
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
||||
uint64_t ws_res_0 =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||
whitespace &= ~quote_mask;
|
||||
int mask1 = whitespace & 0xFFFF;
|
||||
int mask2 = (whitespace >> 16) & 0xFFFF;
|
||||
int mask3 = (whitespace >> 32) & 0xFFFF;
|
||||
int mask4 = (whitespace >> 48) & 0xFFFF;
|
||||
int pop1 = hamming((~whitespace) & 0xFFFF);
|
||||
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
|
||||
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||
int pop4 = hamming((~whitespace));
|
||||
__m128i x1 = _mm256_extracti128_si256(input_lo, 0);
|
||||
__m128i x2 = _mm256_extracti128_si256(input_lo, 1);
|
||||
__m128i x3 = _mm256_extracti128_si256(input_hi, 0);
|
||||
__m128i x4 = _mm256_extracti128_si256(input_hi, 1);
|
||||
x1 = skinnycleanm128(x1, mask1);
|
||||
x2 = skinnycleanm128(x2, mask2);
|
||||
x3 = skinnycleanm128(x3, mask3);
|
||||
x4 = skinnycleanm128(x4, mask4);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out), x1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop1), x2);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop2), x3);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop3), x4);
|
||||
out += pop4;
|
||||
}
|
||||
}
|
||||
// we finish off the job... copying and pasting the code is not ideal here,
|
||||
// but it gets the job done.
|
||||
if (idx < len) {
|
||||
uint8_t buffer[64];
|
||||
memset(buffer, 0, 64);
|
||||
memcpy(buffer, buf + idx, len - idx);
|
||||
__m256i input_lo =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
|
||||
__m256i input_hi =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
|
||||
uint64_t bs_bits =
|
||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
||||
uint64_t even_starts = start_edges & even_start_mask;
|
||||
uint64_t odd_starts = start_edges & ~even_start_mask;
|
||||
uint64_t even_carries = bs_bits + even_starts;
|
||||
uint64_t odd_carries;
|
||||
// bool iter_ends_odd_backslash =
|
||||
add_overflow(bs_bits, odd_starts, &odd_carries);
|
||||
odd_carries |= prev_iter_ends_odd_backslash;
|
||||
// prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
||||
// // we never use it
|
||||
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
||||
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
||||
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
||||
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
||||
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
||||
uint64_t quote_bits =
|
||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
|
||||
quote_bits = quote_bits & ~odd_ends;
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
// prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we
|
||||
// don't need this anymore
|
||||
|
||||
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
||||
__m256i mask_70 =
|
||||
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
|
||||
// but moves any value >= 16 above 128
|
||||
|
||||
__m256i lut_cntrl = _mm256_setr_epi8(
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
|
||||
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
|
||||
|
||||
__m256i tmp_ws_lo = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi8(mask_20, input_lo),
|
||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
|
||||
__m256i tmp_ws_hi = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi8(mask_20, input_hi),
|
||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
whitespace &= ~quote_mask;
|
||||
|
||||
if (len - idx < 64) {
|
||||
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
|
||||
}
|
||||
int mask1 = whitespace & 0xFFFF;
|
||||
int mask2 = (whitespace >> 16) & 0xFFFF;
|
||||
int mask3 = (whitespace >> 32) & 0xFFFF;
|
||||
int mask4 = (whitespace >> 48) & 0xFFFF;
|
||||
int pop1 = hamming((~whitespace) & 0xFFFF);
|
||||
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
|
||||
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||
int pop4 = hamming((~whitespace));
|
||||
__m128i x1 = _mm256_extracti128_si256(input_lo, 0);
|
||||
__m128i x2 = _mm256_extracti128_si256(input_lo, 1);
|
||||
__m128i x3 = _mm256_extracti128_si256(input_hi, 0);
|
||||
__m128i x4 = _mm256_extracti128_si256(input_hi, 1);
|
||||
x1 = skinnycleanm128(x1, mask1);
|
||||
x2 = skinnycleanm128(x2, mask2);
|
||||
x3 = skinnycleanm128(x3, mask3);
|
||||
x4 = skinnycleanm128(x4, mask4);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(buffer), x1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + pop1), x2);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + pop2), x3);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + pop3), x4);
|
||||
memcpy(out, buffer, pop4);
|
||||
out += pop4;
|
||||
}
|
||||
*out = '\0'; // NULL termination
|
||||
return out - initout;
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
#endif
|
|
@ -1,5 +1,4 @@
|
|||
#include "simdjson.h"
|
||||
#include "implementation.cpp"
|
||||
#include "jsonminifier.cpp"
|
||||
#include "stage1_find_marks.cpp"
|
||||
#include "stage2_build_tape.cpp"
|
||||
|
|
|
@ -46,7 +46,7 @@ really_inline int leading_zeroes(uint64_t input_num) {
|
|||
#endif// _MSC_VER
|
||||
}
|
||||
|
||||
really_inline int hamming(uint64_t input_num) {
|
||||
really_inline int count_ones(uint64_t input_num) {
|
||||
#ifdef _MSC_VER
|
||||
// note: we do not support legacy 32-bit Windows
|
||||
return __popcnt64(input_num);// Visual Studio wants two underscores
|
||||
|
|
|
@ -11,6 +11,7 @@ class implementation final : public simdjson::implementation {
|
|||
public:
|
||||
really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", instruction_set::SSE42 | instruction_set::PCLMULQDQ) {}
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final;
|
||||
|
|
|
@ -2,8 +2,12 @@
|
|||
#define SIMDJSON_WESTMERE_SIMD_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "simdprune_tables.h"
|
||||
#include "westmere/bitmanipulation.h"
|
||||
#include "westmere/intrinsics.h"
|
||||
|
||||
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson::westmere::simd {
|
||||
|
||||
|
@ -106,6 +110,42 @@ namespace simdjson::westmere::simd {
|
|||
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
|
||||
return _mm_shuffle_epi8(lookup_table, *this);
|
||||
}
|
||||
|
||||
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
|
||||
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
|
||||
// Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes
|
||||
// get written.
|
||||
// Design consideration: it seems like a function with the
|
||||
// signature simd8<L> compress(uint32_t mask) would be
|
||||
// sensible, but the AVX ISA makes this kind of approach difficult.
|
||||
template<typename L>
|
||||
really_inline void compress(uint16_t mask, L * output) const {
|
||||
// this particular implementation was inspired by work done by @animetosho
|
||||
// we do it in two steps, first 8 bytes and then second 8 bytes
|
||||
uint8_t mask1 = static_cast<uint8_t>(mask); // least significant 8 bits
|
||||
uint8_t mask2 = static_cast<uint8_t>(mask >> 8); // most significant 8 bits
|
||||
// next line just loads the 64-bit values thintable_epi8[mask1] and
|
||||
// thintable_epi8[mask2] into a 128-bit register, using only
|
||||
// two instructions on most compilers.
|
||||
__m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]);
|
||||
// we increment by 0x08 the second half of the mask
|
||||
shufmask =
|
||||
_mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0));
|
||||
// this is the version "nearly pruned"
|
||||
__m128i pruned = _mm_shuffle_epi8(*this, shufmask);
|
||||
// we still need to put the two halves together.
|
||||
// we compute the popcount of the first half:
|
||||
int pop1 = BitsSetTable256mul2[mask1];
|
||||
// then load the corresponding mask, what it does is to write
|
||||
// only the first pop1 bytes from the first 8 bytes, and then
|
||||
// it fills in with the bytes from the second 8 bytes + some filling
|
||||
// at the end.
|
||||
__m128i compactmask =
|
||||
_mm_loadu_si128((const __m128i *)(pshufb_combine_table + pop1 * 8));
|
||||
__m128i answer = _mm_shuffle_epi8(pruned, compactmask);
|
||||
_mm_storeu_si128(( __m128i *)(output), answer);
|
||||
}
|
||||
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup_16(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
|
@ -235,6 +275,13 @@ namespace simdjson::westmere::simd {
|
|||
this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
|
||||
}
|
||||
|
||||
really_inline void compress(uint64_t mask, T * output) const {
|
||||
this->chunks[0].compress(mask, output);
|
||||
this->chunks[1].compress(mask >> 16, output + 16 - count_ones(mask & 0xFFFF));
|
||||
this->chunks[2].compress(mask >> 32, output + 32 - count_ones(mask & 0xFFFFFFFF));
|
||||
this->chunks[3].compress(mask >> 48, output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static really_inline void each_index(F const& each) {
|
||||
each(0);
|
||||
|
@ -302,7 +349,6 @@ namespace simdjson::westmere::simd {
|
|||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](auto a) { return a <= mask; } ).to_bitmask();
|
||||
}
|
||||
|
||||
}; // struct simd8x64<T>
|
||||
|
||||
} // namespace simdjson::westmere::simd
|
||||
|
|
|
@ -29,6 +29,11 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
|||
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
|
||||
auto op_table = simd8<uint8_t>::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
|
||||
|
||||
// We compute whitespace and op separately. If the code later only use one or the
|
||||
// other, given the fact that all functions are aggressively inlined, we can
|
||||
// hope that useless computations will be omitted. This is namely case when
|
||||
// minifying (we only need whitespace).
|
||||
|
||||
uint64_t whitespace = in.map([&](simd8<uint8_t> _in) {
|
||||
return _in == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, _in));
|
||||
}).to_bitmask();
|
||||
|
@ -53,11 +58,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
|
|||
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
|
||||
}
|
||||
|
||||
#include "generic/utf8_lookup2_algorithm.h"
|
||||
#include "generic/buf_block_reader.h"
|
||||
#include "generic/json_string_scanner.h"
|
||||
#include "generic/json_scanner.h"
|
||||
#include "generic/json_structural_indexer.h"
|
||||
|
||||
#include "generic/json_minifier.h"
|
||||
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
|
||||
return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
|
||||
}
|
||||
|
||||
#include "generic/utf8_lookup2_algorithm.h"
|
||||
#include "generic/json_structural_indexer.h"
|
||||
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept {
|
||||
return westmere::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
|
||||
}
|
||||
|
|
|
@ -892,7 +892,8 @@ namespace dom_api {
|
|||
if (doc["a"].as_uint64_t().first != 1) { cerr << "Expected uint64_t(doc[\"a\"]) to be 1, was " << doc["a"].first << endl; return false; }
|
||||
|
||||
UNUSED document::element val;
|
||||
tie(val, error) = doc["d"];
|
||||
// tie(val, error) = doc["d"]; fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
doc["d"].tie(val, error);
|
||||
if (error != simdjson::NO_SUCH_FIELD) { cerr << "Expected NO_SUCH_FIELD error for uint64_t(doc[\"d\"]), got " << error << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
@ -906,11 +907,11 @@ namespace dom_api {
|
|||
if (doc["obj"]["a"].as_uint64_t().first != 1) { cerr << "Expected uint64_t(doc[\"obj\"][\"a\"]) to be 1, was " << doc["obj"]["a"].first << endl; return false; }
|
||||
|
||||
document::object obj;
|
||||
tie(obj, error) = doc.as_object();
|
||||
doc.as_object().tie(obj, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
if (obj["obj"]["a"].as_uint64_t().first != 1) { cerr << "Expected uint64_t(doc[\"obj\"][\"a\"]) to be 1, was " << doc["obj"]["a"].first << endl; return false; }
|
||||
|
||||
tie(obj, error) = obj["obj"].as_object();
|
||||
obj["obj"].as_object().tie(obj, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (obj["a"].as_uint64_t().first != 1) { cerr << "Expected uint64_t(obj[\"a\"]) to be 1, was " << obj["a"].first << endl; return false; }
|
||||
if (obj["b"].as_uint64_t().first != 2) { cerr << "Expected uint64_t(obj[\"b\"]) to be 2, was " << obj["b"].first << endl; return false; }
|
||||
if (obj["c"].as_uint64_t().first != 3) { cerr << "Expected uint64_t(obj[\"c\"]) to be 3, was " << obj["c"].first << endl; return false; }
|
||||
|
@ -920,7 +921,7 @@ namespace dom_api {
|
|||
if (obj["a"].as_uint64_t().first != 1) { cerr << "Expected uint64_t(obj[\"a\"]) to be 1, was " << obj["a"].first << endl; return false; }
|
||||
|
||||
UNUSED document::element val;
|
||||
tie(val, error) = doc["d"];
|
||||
doc["d"].tie(val, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error != simdjson::NO_SUCH_FIELD) { cerr << "Expected NO_SUCH_FIELD error for uint64_t(obj[\"d\"]), got " << error << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
@ -944,14 +945,14 @@ namespace dom_api {
|
|||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
for (auto tweet : tweets) {
|
||||
document::object user;
|
||||
tie(user, error) = tweet["user"].as_object();
|
||||
tweet["user"].as_object().tie(user, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
bool default_profile;
|
||||
tie(default_profile, error) = user["default_profile"].as_bool();
|
||||
user["default_profile"].as_bool().tie(default_profile, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
if (default_profile) {
|
||||
std::string_view screen_name;
|
||||
tie(screen_name, error) = user["screen_name"].as_string();
|
||||
user["screen_name"].as_string().tie(screen_name, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
default_users.insert(screen_name);
|
||||
}
|
||||
|
@ -972,13 +973,13 @@ namespace dom_api {
|
|||
if (!not_found) {
|
||||
for (auto image : media) {
|
||||
document::object sizes;
|
||||
tie(sizes, error) = image["sizes"].as_object();
|
||||
image["sizes"].as_object().tie(sizes, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
for (auto [key, size] : sizes) {
|
||||
uint64_t width, height;
|
||||
tie(width, error) = size["w"].as_uint64_t();
|
||||
size["w"].as_uint64_t().tie(width, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
tie(height, error) = size["h"].as_uint64_t();
|
||||
size["h"].as_uint64_t().tie(height, error); // tie(...) = fails with "no viable overloaded '='" on Apple clang version 11.0.0
|
||||
if (error) { cerr << "Error: " << error << endl; return false; }
|
||||
image_sizes.insert(make_pair(width, height));
|
||||
}
|
||||
|
|
|
@ -1,18 +1,90 @@
|
|||
#include <iostream>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "simdjson.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
exit(1);
|
||||
// Stash the exe_name in main() for functions to use
|
||||
char* exe_name;
|
||||
|
||||
void print_usage(std::ostream& out) {
|
||||
out << "Usage: " << exe_name << " [-a ARCH] <jsonfile>" << std::endl;
|
||||
out << std::endl;
|
||||
out << "Runs the parser against the given json files in a loop, measuring speed and other statistics." << std::endl;
|
||||
out << std::endl;
|
||||
out << "Options:" << std::endl;
|
||||
out << std::endl;
|
||||
out << "-a IMPL - Use the given parser implementation. By default, detects the most advanced" << std::endl;
|
||||
out << " implementation supported on the host machine." << std::endl;
|
||||
for (auto impl : simdjson::available_implementations) {
|
||||
out << "-a " << std::left << std::setw(9) << impl->name() << " - Use the " << impl->description() << " parser implementation." << std::endl;
|
||||
}
|
||||
std::string filename = argv[argc - 1];
|
||||
}
|
||||
|
||||
void exit_usage(std::string message) {
|
||||
std::cerr << message << std::endl;
|
||||
std::cerr << std::endl;
|
||||
print_usage(std::cerr);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
struct option_struct {
|
||||
char* filename;
|
||||
|
||||
option_struct(int argc, char **argv) {
|
||||
#ifndef _MSC_VER
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "a:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a': {
|
||||
const simdjson::implementation *impl = simdjson::available_implementations[optarg];
|
||||
if (!impl) {
|
||||
std::string exit_message = std::string("Unsupported option value -a ") + optarg + ": expected -a with one of ";
|
||||
for (auto imple : simdjson::available_implementations) {
|
||||
exit_message += imple->name();
|
||||
exit_message += " ";
|
||||
}
|
||||
exit_usage(exit_message);
|
||||
}
|
||||
simdjson::active_implementation = impl;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// reaching here means an argument was given to getopt() which did not have a case label
|
||||
exit_usage("Unexpected argument - missing case for option "+
|
||||
std::string(1,static_cast<char>(c))+
|
||||
" (programming error)");
|
||||
}
|
||||
}
|
||||
#else
|
||||
int optind = 1;
|
||||
#endif
|
||||
|
||||
// All remaining arguments are considered to be files
|
||||
if(optind + 1 == argc) {
|
||||
filename = argv[optind];
|
||||
} else {
|
||||
exit_usage("Please specify exactly one input file.");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
exe_name = argv[0];
|
||||
option_struct options(argc, argv);
|
||||
std::string filename = options.filename;
|
||||
auto [p, error] = simdjson::padded_string::load(filename);
|
||||
if (error) {
|
||||
std::cerr << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
simdjson::json_minify(p, p.data());
|
||||
printf("%s", p.data());
|
||||
simdjson::padded_string copy(p.length());
|
||||
size_t copy_len;
|
||||
error = simdjson::active_implementation->minify((const uint8_t*)p.data(), p.length(), (uint8_t*)copy.data(), copy_len);
|
||||
if (error) { std::cerr << error << std::endl; return 1; }
|
||||
printf("%s", copy.data());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue