Merge branch 'master' into dlemire/better_doxygen_home_page

This commit is contained in:
Daniel Lemire 2020-06-17 16:02:49 -04:00 committed by GitHub
commit 3f00e79bcb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 322 additions and 14 deletions

View File

@ -83,7 +83,7 @@ struct option_struct {
bool stage1_only = false;
int32_t iterations = 200;
int32_t iteration_step = 50;
int32_t iteration_step = -1;
bool verbose = false;
bool tabbed_output = false;
@ -149,6 +149,12 @@ struct option_struct {
}
}
if (iteration_step == -1) {
iteration_step = iterations / 50;
if (iteration_step < 200) { iteration_step = 200; }
if (iteration_step > iterations) { iteration_step = iterations; }
}
// All remaining arguments are considered to be files
for (int i=optind; i<argc; i++) {
files.push_back(argv[i]);
@ -156,11 +162,6 @@ struct option_struct {
if (files.empty()) {
exit_usage("No files specified");
}
// Keeps the numbers the same for CI (old ./parse didn't have a two-stage loop)
if (files.size() == 1) {
iteration_step = iterations;
}
}
};

View File

@ -513,8 +513,8 @@ parser for your CPU, is transparent and thread-safe.
Backwards Compatibility
-----------------------
The only header file supported by simdjson is simdjson.h. Older versions of simdjson published a
number of other include files such as document.h or ParsedJson.h alongside simdjson.h; these headers
The only header file supported by simdjson is `simdjson.h`. Older versions of simdjson published a
number of other include files such as `document.h` or `ParsedJson.h` alongside `simdjson.h`; these headers
may be moved or removed in future versions.

View File

@ -97,6 +97,10 @@ of magnitude cheaper. Ain't that awesome!
Thread support is only active if thread supported is detected in which case the macro
SIMDJSON_THREADS_ENABLED is set. Otherwise the library runs in single-thread mode.
A `document_stream` instance uses at most two threads: there is a main thread and a worker thread.
You should expect the main thread to be fully occupied while the worker thread is partially busy
(e.g., 80% of the time).
Support
-------

View File

@ -11,13 +11,16 @@ are still some scenarios where tuning can enhance performance.
* [Computed GOTOs](#computed-gotos)
* [Number parsing](#number-parsing)
* [Visual Studio](#visual-studio)
* [Downclocking](#downclocking)
Reusing the parser for maximum efficiency
-----------------------------------------
If you're using simdjson to parse multiple documents, or in a loop, you should make a parser once
and reuse it. The simdjson library will allocate and retain internal buffers between parses, keeping
buffers hot in cache and keeping memory allocation and initialization to a minimum.
buffers hot in cache and keeping memory allocation and initialization to a minimum. In this manner,
you can parse terabytes of JSON data without doing any new allocation.
```c++
dom::parser parser;
@ -154,6 +157,19 @@ We do not recommend that you compile simdjson with architecture-specific flags s
Recent versions of Microsoft Visual Studio on Windows provides support for the LLVM Clang compiler. You only need to install the "Clang compiler" optional component. You may also get a copy of the 64-bit LLVM CLang compiler for [Windows directly from LLVM](https://releases.llvm.org/download.html). The simdjson library fully supports the LLVM Clang compiler under Windows. In fact, you may get better performance out of simdjson with the LLVM Clang compiler than with the regular Visual Studio compiler.
Downclocking
--------------
You should not expect the simdjson library to cause downclocking of your recent Intel CPU cores.
On some Intel processors, using SIMD instructions in a sustained manner on the same CPU core may result in a phenomenon called downclocking whereas the processor initially runs these instructions at a slow speed before reducing the frequency of the core for a short time (milliseconds). Intel refers to these states as licenses. On some current Intel processors, it occurs under two scenarios:
- [Whenever 512-bit AVX-512 instructions are used](https://lemire.me/blog/2018/09/07/avx-512-when-and-how-to-use-these-new-instructions/).
- Whenever heavy 256-bit or wider instructions are used. Heavy instructions are those involving floating point operations or integer multiplications (since these execute on the floating point unit).
The simdjson library does not currently support AVX-512 instructions and it does not make use of heavy 256-bit instructions. Thus there should be no downclocking due to simdjson on recent processors. You may still be worried about which SIMD instruction set is used by simdjson. Thankfully, [you can always determine and change which architecture-specific implementation is used](implementation-selection.md). Thus even if your CPU supports AVX2, you do not need to use AVX2. You are in control.
Further Reading
-------------

View File

@ -73,17 +73,32 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
return is_second_byte ^ is_third_byte ^ is_fourth_byte;
}
really_inline simd8<bool> must_be_2_3_continuation(simd8<uint8_t> prev2, simd8<uint8_t> prev3) {
simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
return is_third_byte ^ is_fourth_byte;
}
#include "generic/stage1/buf_block_reader.h"
#include "generic/stage1/json_string_scanner.h"
#include "generic/stage1/json_scanner.h"
namespace stage1 {
really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
// On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no
// benefit and therefore makes things worse.
// if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
return find_escaped_branchless(backslash);
}
}
#include "generic/stage1/json_minifier.h"
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
}
#include "generic/stage1/find_next_document_index.h"
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/utf8_lookup3_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;

View File

@ -37,7 +37,9 @@ public:
really_inline error_code finish(bool streaming);
private:
// Intended to be defined by the implementation
really_inline uint64_t find_escaped(uint64_t escape);
really_inline uint64_t find_escaped_branchless(uint64_t escape);
// Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
uint64_t prev_in_string = 0ULL;
@ -72,7 +74,7 @@ private:
// desired | x | x x x x x x x x |
// text | \\\ | \\\"\\\" \\\" \\"\\" |
//
really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
// If there was overflow, pretend the first character isn't a backslash
backslash &= ~prev_escaped;
uint64_t follows_escape = backslash << 1 | prev_escaped;
@ -101,13 +103,23 @@ really_inline json_string_block json_string_scanner::next(const simd::simd8x64<u
const uint64_t backslash = in.eq('\\');
const uint64_t escaped = find_escaped(backslash);
const uint64_t quote = in.eq('"') & ~escaped;
//
// prefix_xor flips on bits inside the string (and flips off the end quote).
//
// Then we xor with prev_in_string: if we were in a string already, its effect is flipped
// (characters inside strings are outside, and characters outside strings are inside).
//
const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
//
// Check if we're still in a string at the end of the box so the next block will know
//
// right shift of a signed value expected to be well-defined and standard
// compliant as of C++20, John Regher from Utah U. says this is fine code
//
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
// Use ^ to turn the beginning quote off, and the end quote on.
return {
backslash,

View File

@ -0,0 +1,230 @@
//
// Detect Unicode errors.
//
// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic
// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits
// are straight up concatenated into the final value. The first byte of a multibyte character is a
// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
// start with 0, because that's what ASCII looks like. Here's what each size looks like:
//
// - ASCII (7 bits): 0_______
// - 2 byte character (11 bits): 110_____ 10______
// - 3 byte character (17 bits): 1110____ 10______ 10______
// - 4 byte character (23 bits): 11110___ 10______ 10______ 10______
// - 5+ byte character (illegal): 11111___ <illegal>
//
// There are 5 classes of error that can happen in Unicode:
//
// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation).
// We detect this by looking for new characters (lead bytes) inside the range of a multibyte
// character.
//
// e.g. 11000000 01100001 (2-byte character where second byte is ASCII)
//
// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation).
// We detect this by requiring that the next byte after your multibyte character be a new
// character--so a continuation after your character is wrong.
//
// e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte)
//
// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large.
//
// e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF).
//
// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have
// used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is
// technically possible, but UTF-8 disallows it so that there is only one way to write an "a".
//
// e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001)
//
// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and
// WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8.
//
// e.g. 11101101 10100000 10000000 (U+D800)
//
// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not
// support values with more than 23 bits (which a 4-byte character supports).
//
// e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000)
//
// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
//
// Code Points 1st 2s 3s 4s
// U+0000..U+007F 00..7F
// U+0080..U+07FF C2..DF 80..BF
// U+0800..U+0FFF E0 A0..BF 80..BF
// U+1000..U+CFFF E1..EC 80..BF 80..BF
// U+D000..U+D7FF ED 80..9F 80..BF
// U+E000..U+FFFF EE..EF 80..BF 80..BF
// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
//
using namespace simd;
namespace utf8_validation {
// For a detailed description of the lookup2 algorithm, see the file HACKING.md under "UTF-8 validation (lookup2)".
//
// Find special case UTF-8 errors where the character is technically readable (has the right length)
// but the *value* is disallowed.
//
// This includes overlong encodings, surrogates and values too large for Unicode.
//
// It turns out the bad character ranges can all be detected by looking at the first 12 bits of the
// UTF-8 encoded character (i.e. all of byte 1, and the high 4 bits of byte 2). This algorithm does a
// 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together.
// If all 3 lookups detect the same error, it's an error.
//
really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
//
// These are the errors we're going to match for bytes 1-2, by looking at the first three
// nibbles of the character: <high bits of byte 1>> & <low bits of byte 1> & <high bits of byte 2>
//
static const int OVERLONG_2 = 0x01; // 1100000_ 10______ (technically we match 10______ but we could match ________, they both yield errors either way)
static const int OVERLONG_3 = 0x02; // 11100000 100_____ ________
static const int OVERLONG_4 = 0x04; // 11110000 1000____ ________ ________
static const int SURROGATE = 0x08; // 11101101 [101_]____
static const int TOO_LARGE = 0x10; // 11110100 (1001|101_)____
static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______
// New with lookup3. We want to catch the case where an non-continuation
// follows a leading byte
static const int TOO_SHORT_2_3_4 = 0x40; // (110_|1110|1111) ____ (0___|110_|1111) ____
// We also want to catch a continuation that is preceded by an ASCII byte
static const int LONELY_CONTINUATION = 0x80; // 0___ ____ 01__ ____
// After processing the rest of byte 1 (the low bits), we're still not done--we have to check
// byte 2 to be sure which things are errors and which aren't.
// Since high_bits is byte 5, byte 2 is high_bits.prev<3>
static const int CARRY = OVERLONG_2 | TOO_LARGE_2;
const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
// ASCII: ________ [0___]____
CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
// ASCII: ________ [0___]____
CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
// Continuations: ________ [10__]____
CARRY | OVERLONG_3 | OVERLONG_4 | LONELY_CONTINUATION, // ________ [1000]____
CARRY | OVERLONG_3 | TOO_LARGE | LONELY_CONTINUATION, // ________ [1001]____
CARRY | TOO_LARGE | SURROGATE | LONELY_CONTINUATION, // ________ [1010]____
CARRY | TOO_LARGE | SURROGATE | LONELY_CONTINUATION, // ________ [1011]____
// Multibyte Leads: ________ [11__]____
CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4, // 110_
CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4
);
const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
// [0___]____ (ASCII)
LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION,
LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION,
// [10__]____ (continuation)
0, 0, 0, 0,
// [11__]____ (2+-byte leads)
OVERLONG_2 | TOO_SHORT_2_3_4, TOO_SHORT_2_3_4, // [110_]____ (2-byte lead)
OVERLONG_3 | SURROGATE | TOO_SHORT_2_3_4, // [1110]____ (3-byte lead)
OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 | TOO_SHORT_2_3_4 // [1111]____ (4+-byte lead)
);
const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
// ____[00__] ________
OVERLONG_2 | OVERLONG_3 | OVERLONG_4 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION, // ____[0000] ________
OVERLONG_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION, // ____[0001] ________
TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
// ____[01__] ________
TOO_LARGE | TOO_SHORT_2_3_4 | LONELY_CONTINUATION, // ____[0100] ________
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
// ____[10__] ________
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
// ____[11__] ________
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
TOO_LARGE_2 | SURROGATE | TOO_SHORT_2_3_4 | LONELY_CONTINUATION, // ____[1101] ________
TOO_LARGE_2 | TOO_SHORT_2_3_4| LONELY_CONTINUATION,
TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION
);
return byte_1_high & byte_1_low & byte_2_high;
}
really_inline simd8<uint8_t> check_multibyte_lengths(simd8<uint8_t> input, simd8<uint8_t> prev_input,
simd8<uint8_t> prev1) {
simd8<uint8_t> prev2 = input.prev<2>(prev_input);
simd8<uint8_t> prev3 = input.prev<3>(prev_input);
// is_2_3_continuation uses one more instruction than lookup2
simd8<bool> is_2_3_continuation = (simd8<int8_t>(input).max(simd8<int8_t>(prev1))) < int8_t(-64);
// must_be_2_3_continuation has two fewer instructions than lookup 2
return simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3) ^ is_2_3_continuation);
}
//
// Return nonzero if there are incomplete multibyte characters at the end of the block:
// e.g. if there is a 4-byte character, but it's 3 bytes from the end.
//
really_inline simd8<uint8_t> is_incomplete(simd8<uint8_t> input) {
// If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
// ... 1111____ 111_____ 11______
static const uint8_t max_array[32] = {
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
};
const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
return input.gt_bits(max_value);
}
struct utf8_checker {
// If this is nonzero, there has been a UTF-8 error.
simd8<uint8_t> error;
// The last input we received
simd8<uint8_t> prev_input_block;
// Whether the last input we received was incomplete (used for ASCII fast path)
simd8<uint8_t> prev_incomplete;
//
// Check whether the current bytes are valid UTF-8.
//
really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
// (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
simd8<uint8_t> prev1 = input.prev<1>(prev_input);
this->error |= check_special_cases(input, prev1);
this->error |= check_multibyte_lengths(input, prev_input, prev1);
}
// The only problem that can happen at EOF is that a multibyte character is too short.
really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them.
this->error |= this->prev_incomplete;
}
really_inline void check_next_input(simd8x64<uint8_t> input) {
if (likely(is_ascii(input))) {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them.
this->error |= this->prev_incomplete;
} else {
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
for (int i=1; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) {
this->check_utf8_bytes(input.chunks[i], input.chunks[i-1]);
}
this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
}
}
really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
}
}; // struct utf8_checker
}
using utf8_validation::utf8_checker;

View File

@ -61,17 +61,32 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}
really_inline simd8<bool> must_be_2_3_continuation(simd8<uint8_t> prev2, simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
}
#include "generic/stage1/buf_block_reader.h"
#include "generic/stage1/json_string_scanner.h"
#include "generic/stage1/json_scanner.h"
namespace stage1 {
really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
return find_escaped_branchless(backslash);
}
}
#include "generic/stage1/json_minifier.h"
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
}
#include "generic/stage1/find_next_document_index.h"
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/utf8_lookup3_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;

View File

@ -62,17 +62,32 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}
really_inline simd8<bool> must_be_2_3_continuation(simd8<uint8_t> prev2, simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
}
#include "generic/stage1/buf_block_reader.h"
#include "generic/stage1/json_string_scanner.h"
#include "generic/stage1/json_scanner.h"
namespace stage1 {
really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
return find_escaped_branchless(backslash);
}
}
#include "generic/stage1/json_minifier.h"
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
}
#include "generic/stage1/find_next_document_index.h"
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/utf8_lookup3_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;