Stop using jsoncharutils.h in JsonStream

This commit is contained in:
John Keiser 2020-03-03 16:09:20 -08:00
parent eb147d9868
commit 5525c6f729
5 changed files with 21 additions and 21 deletions

View File

@ -7,7 +7,6 @@
#include <thread> #include <thread>
#include "simdjson/padded_string.h" #include "simdjson/padded_string.h"
#include "simdjson/simdjson.h" #include "simdjson/simdjson.h"
#include "jsoncharutils.h"
namespace simdjson { namespace simdjson {
@ -233,6 +232,22 @@ template <class string_container> JsonStream<string_container>::~JsonStream() {
#endif #endif
} }
namespace internal {
// returns true if the provided byte value is an ASCII character
static inline bool is_ascii(char c) {
return ((unsigned char)c) <= 127;
}
// if the string ends with UTF-8 values, backtrack
// up to the first ASCII character. May return 0.
static inline size_t trimmed_length_safe_utf8(const char * c, size_t len) {
while ((len > 0) and (not is_ascii(c[len - 1]))) {
len--;
}
return len;
}
}
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
// threaded version of json_parse // threaded version of json_parse
@ -257,7 +272,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
// First time loading // First time loading
if (!stage_1_thread.joinable()) { if (!stage_1_thread.joinable()) {
_batch_size = (std::min)(_batch_size, remaining()); _batch_size = (std::min)(_batch_size, remaining());
_batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size); _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
if (_batch_size == 0) { if (_batch_size == 0) {
return parser.error = simdjson::UTF8_ERROR; return parser.error = simdjson::UTF8_ERROR;
} }
@ -291,7 +306,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
parser.structural_indexes[find_last_json_buf_idx(buf(), _batch_size, parser)]; parser.structural_indexes[find_last_json_buf_idx(buf(), _batch_size, parser)];
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc); _batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
if (_batch_size > 0) { if (_batch_size > 0) {
_batch_size = trimmed_length_safe_utf8( _batch_size = internal::trimmed_length_safe_utf8(
(const char *)(buf() + last_json_buffer_loc), _batch_size); (const char *)(buf() + last_json_buffer_loc), _batch_size);
if (_batch_size == 0) { if (_batch_size == 0) {
return parser.error = simdjson::UTF8_ERROR; return parser.error = simdjson::UTF8_ERROR;
@ -343,7 +358,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
advance(current_buffer_loc); advance(current_buffer_loc);
n_bytes_parsed += current_buffer_loc; n_bytes_parsed += current_buffer_loc;
_batch_size = (std::min)(_batch_size, remaining()); _batch_size = (std::min)(_batch_size, remaining());
_batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size); _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true); auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true);
if (stage1_is_ok != simdjson::SUCCESS) { if (stage1_is_ok != simdjson::SUCCESS) {
return parser.on_error(stage1_is_ok); return parser.on_error(stage1_is_ok);

View File

@ -30,7 +30,6 @@ set(SIMDJSON_SRC_HEADERS
error.cpp error.cpp
implementation.cpp implementation.cpp
isadetection.h isadetection.h
jsoncharutils.h
jsonioutil.cpp jsonioutil.cpp
jsonminifier.cpp jsonminifier.cpp
simdprune_tables.h simdprune_tables.h

View File

@ -6,7 +6,7 @@ namespace simdjson {
const std::map<int, const std::string> error_strings = { const std::map<int, const std::string> error_strings = {
{SUCCESS, "No error"}, {SUCCESS, "No error"},
{SUCCESS_AND_HAS_MORE, "No error and buffer still has more data"}, {SUCCESS_AND_HAS_MORE, "No error and buffer still has more data"},
{CAPACITY, "This ParsedJson can't support a document that big"}, {CAPACITY, "This parser can't support a document that big"},
{MEMALLOC, "Error allocating memory, we're most likely out of memory"}, {MEMALLOC, "Error allocating memory, we're most likely out of memory"},
{TAPE_ERROR, "Something went wrong while writing to the tape"}, {TAPE_ERROR, "Something went wrong while writing to the tape"},
{STRING_ERROR, "Problem while parsing a string"}, {STRING_ERROR, "Problem while parsing a string"},

View File

@ -1,7 +1,7 @@
namespace stage2 { namespace stage2 {
struct streaming_structural_parser: structural_parser { struct streaming_structural_parser: structural_parser {
really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_doc_parser, size_t _i) : structural_parser(_buf, _len, _doc_parser, _i) {} really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, document::parser &_doc_parser, size_t _i) : structural_parser(_buf, _len, _doc_parser, _i) {}
// override to add streaming // override to add streaming
WARN_UNUSED really_inline error_code start(ret_address finish_parser) { WARN_UNUSED really_inline error_code start(ret_address finish_parser) {

View File

@ -2,7 +2,6 @@
#define SIMDJSON_JSONCHARUTILS_H #define SIMDJSON_JSONCHARUTILS_H
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
namespace simdjson { namespace simdjson {
// structural chars here are // structural chars here are
@ -264,19 +263,6 @@ static inline bool is_utf8_continuing(char c) {
// go up to 0b11111 (-1)... so we want all values from -128 to -65 (which is 0b10111111) // go up to 0b11111 (-1)... so we want all values from -128 to -65 (which is 0b10111111)
return ((signed char)c) <= -65; return ((signed char)c) <= -65;
} }
// returns true if the provided byte value is an ASCII character
static inline bool is_ascii(char c) {
return ((unsigned char)c) <= 127;
}
// if the string ends with UTF-8 values, backtrack
// up to the first ASCII character. May return 0.
static inline size_t trimmed_length_safe_utf8(const char * c, size_t len) {
while ((len > 0) and (not is_ascii(c[len - 1]))) {
len--;
}
return len;
}