Merge pull request #895 from simdjson/jkeiser/logger

Add logging to stage 2
This commit is contained in:
John Keiser 2020-05-21 14:11:01 -07:00 committed by GitHub
commit ea842e78af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 150 additions and 16 deletions

View File

@ -9,6 +9,7 @@
namespace simdjson {
namespace arm64 {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"

View File

@ -7,6 +7,7 @@
namespace simdjson {
namespace fallback {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"

View File

@ -0,0 +1,64 @@
// This is for an internal-only stage 2 specific logger.
// Set LOG_ENABLED = true to log what stage 2 is doing!
namespace logger {
static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
static constexpr const bool LOG_ENABLED = false;
static constexpr const int LOG_EVENT_LEN = 30;
static constexpr const int LOG_BUFFER_LEN = 20;
static constexpr const int LOG_DETAIL_LEN = 50;
static int log_depth; // Not threadsafe. Log only.
// Helper to turn unprintable or newline characters into spaces
static really_inline char printable_char(char c) {
if (c >= 0x20) {
return c;
} else {
return ' ';
}
}
// Print the header and set up log_start
static really_inline void log_start() {
if (LOG_ENABLED) {
log_depth = 0;
printf("\n");
printf("| %-*s | %-*s | %*s | %*s | %*s | %-*s |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", 4, "Curr", 4, "Next", 5, "Next#", LOG_DETAIL_LEN, "Detail");
printf("|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, 4+2, DASHES, 4+2, DASHES, 5+2, DASHES, LOG_DETAIL_LEN+2, DASHES);
}
}
// Logs a single line of
template<typename S>
static really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
if (LOG_ENABLED) {
printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
{
// Print the next N characters in the buffer.
printf("| ");
if (structurals.at_beginning()) {
// If the pointer is at the beginning, print a space followed by the beginning characters
// Print spaces for unprintable or newline characters.
printf(" ");
for (int i=0;i<LOG_BUFFER_LEN-1;i++) {
printf("%c", printable_char(structurals.buf[i]));
}
} else {
// Otherwise, print the characters starting from the buffer position.
// Print spaces for unprintable or newline characters.
for (int i=0;i<LOG_BUFFER_LEN;i++) {
printf("%c", printable_char(structurals.current()[i]));
}
}
printf(" ");
}
printf("| %c ", printable_char(structurals.at_beginning() ? ' ' : structurals.current_char()));
printf("| %c ", printable_char(structurals.peek_char()));
printf("| %5zd ", structurals.next_structural);
printf("| %-*s ", LOG_DETAIL_LEN, detail);
printf("|\n");
}
}
} // namespace logger

View File

@ -5,6 +5,7 @@ struct streaming_structural_parser: structural_parser {
// override to add streaming
WARN_UNUSED really_inline error_code start(UNUSED size_t len, ret_address finish_parser) {
log_start();
init(); // sets is_valid to false
// Capacity ain't no thang for streaming, so we don't check it.
// Advance to the first character as soon as possible
@ -19,16 +20,20 @@ struct streaming_structural_parser: structural_parser {
// override to add streaming
WARN_UNUSED really_inline error_code finish() {
if ( structurals.past_end(doc_parser.n_structural_indexes) ) {
log_error("IMPOSSIBLE: past the end of the JSON!");
return on_error(TAPE_ERROR);
}
end_document();
if (depth != 0) {
log_error("Unclosed objects or arrays!");
return on_error(TAPE_ERROR);
}
if (doc_parser.containing_scope[depth].tape_index != 0) {
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
return on_error(TAPE_ERROR);
}
bool finished = structurals.at_end(doc_parser.n_structural_indexes);
if (!finished) { log_value("(and has more)"); }
return on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
}
};
@ -76,6 +81,7 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, pa
);
goto finish;
default:
parser.log_error("Document starts with a non-value character");
goto error;
}
@ -85,18 +91,19 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, pa
object_begin:
switch (parser.advance_char()) {
case '"': {
FAIL_IF( parser.parse_string() );
FAIL_IF( parser.parse_string(true) );
goto object_key_parser;
}
case '}':
parser.end_object();
goto scope_end;
default:
parser.log_error("Object does not start with a key");
goto error;
}
object_key_parser:
FAIL_IF( parser.advance_char() != ':' );
if (parser.advance_char() != ':' ) { parser.log_error("Missing colon after key in object"); goto error; }
parser.increment_count();
parser.advance_char();
GOTO( parser.parse_value(addresses, addresses.object_continue) );
@ -104,13 +111,14 @@ object_key_parser:
object_continue:
switch (parser.advance_char()) {
case ',':
FAIL_IF( parser.advance_char() != '"' );
FAIL_IF( parser.parse_string() );
if (parser.advance_char() != '"' ) { parser.log_error("Key string missing at beginning of field in object"); goto error; }
FAIL_IF( parser.parse_string(true) );
goto object_key_parser;
case '}':
parser.end_object();
goto scope_end;
default:
parser.log_error("No comma between object fields");
goto error;
}
@ -142,6 +150,7 @@ array_continue:
parser.end_array();
goto scope_end;
default:
parser.log_error("Missing comma between array values");
goto error;
}

View File

@ -17,6 +17,9 @@ public:
really_inline char current_char() {
return c;
}
really_inline char peek_char() {
return buf[structural_indexes[next_structural]];
}
really_inline const uint8_t* current() {
return &buf[idx];
}
@ -54,6 +57,9 @@ public:
really_inline bool at_end(uint32_t n_structural_indexes) {
return next_structural+1 == n_structural_indexes;
}
really_inline bool at_beginning() {
return next_structural == 0;
}
really_inline size_t next_structural_index() {
return next_structural;
}

View File

@ -90,18 +90,23 @@ struct structural_parser {
write_tape(0, type); // if the document is correct, this gets rewritten later
doc_parser.ret_address[depth] = continue_state;
depth++;
return depth >= doc_parser.max_depth();
bool exceeded_max_depth = depth >= doc_parser.max_depth();
if (exceeded_max_depth) { log_error("Exceeded max depth!"); }
return exceeded_max_depth;
}
WARN_UNUSED really_inline bool start_document(ret_address continue_state) {
log_start_value("document");
return start_scope(internal::tape_type::ROOT, continue_state);
}
WARN_UNUSED really_inline bool start_object(ret_address continue_state) {
log_start_value("object");
return start_scope(internal::tape_type::START_OBJECT, continue_state);
}
WARN_UNUSED really_inline bool start_array(ret_address continue_state) {
log_start_value("array");
return start_scope(internal::tape_type::START_ARRAY, continue_state);
}
@ -121,12 +126,15 @@ struct structural_parser {
}
really_inline void end_object() {
log_end_value("object");
end_scope(internal::tape_type::END_OBJECT);
}
really_inline void end_array() {
log_end_value("array");
end_scope(internal::tape_type::END_ARRAY);
}
really_inline void end_document() {
log_end_value("document");
end_scope(internal::tape_type::ROOT);
}
@ -143,13 +151,12 @@ struct structural_parser {
}
really_inline uint8_t *on_start_string() noexcept {
/* we advance the point, accounting for the fact that we have a NULL
* termination */
// we advance the point, accounting for the fact that we have a NULL termination
write_tape(current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING);
return current_string_buf_loc + sizeof(uint32_t);
}
really_inline bool on_end_string(uint8_t *dst) noexcept {
really_inline void on_end_string(uint8_t *dst) noexcept {
uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
// TODO check for overflow in case someone has a crazy string (>=4GB?)
// But only add the overflow check when the document itself exceeds 4GB
@ -159,21 +166,26 @@ struct structural_parser {
// be NULL terminated? It comes at a small cost
*dst = 0;
current_string_buf_loc = dst + 1;
return true;
}
WARN_UNUSED really_inline bool parse_string() {
WARN_UNUSED really_inline bool parse_string(bool key = false) {
log_value(key ? "key" : "string");
uint8_t *dst = on_start_string();
dst = stringparsing::parse_string(structurals.current(), dst);
if (dst == nullptr) {
log_error("Invalid escape in string");
return true;
}
return !on_end_string(dst);
on_end_string(dst);
return false;
}
WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
log_value("number");
number_writer writer{doc_parser};
return !numberparsing::parse_number(src, found_minus, writer);
bool succeeded = numberparsing::parse_number(src, found_minus, writer);
if (!succeeded) { log_error("Invalid number"); }
return !succeeded;
}
WARN_UNUSED really_inline bool parse_number(bool found_minus) {
return parse_number(structurals.current(), found_minus);
@ -182,18 +194,22 @@ struct structural_parser {
WARN_UNUSED really_inline bool parse_atom() {
switch (structurals.current_char()) {
case 't':
log_value("true");
if (!atomparsing::is_valid_true_atom(structurals.current())) { return true; }
write_tape(0, internal::tape_type::TRUE_VALUE);
break;
case 'f':
log_value("false");
if (!atomparsing::is_valid_false_atom(structurals.current())) { return true; }
write_tape(0, internal::tape_type::FALSE_VALUE);
break;
case 'n':
log_value("null");
if (!atomparsing::is_valid_null_atom(structurals.current())) { return true; }
write_tape(0, internal::tape_type::NULL_VALUE);
break;
default:
log_error("IMPOSSIBLE: unrecognized parse_atom structural character");
return true;
}
return false;
@ -202,18 +218,22 @@ struct structural_parser {
WARN_UNUSED really_inline bool parse_single_atom() {
switch (structurals.current_char()) {
case 't':
log_value("true");
if (!atomparsing::is_valid_true_atom(structurals.current(), structurals.remaining_len())) { return true; }
write_tape(0, internal::tape_type::TRUE_VALUE);
break;
case 'f':
log_value("false");
if (!atomparsing::is_valid_false_atom(structurals.current(), structurals.remaining_len())) { return true; }
write_tape(0, internal::tape_type::FALSE_VALUE);
break;
case 'n':
log_value("null");
if (!atomparsing::is_valid_null_atom(structurals.current(), structurals.remaining_len())) { return true; }
write_tape(0, internal::tape_type::NULL_VALUE);
break;
default:
log_error("IMPOSSIBLE: unrecognized parse_atom structural character");
return true;
}
return false;
@ -241,6 +261,7 @@ struct structural_parser {
FAIL_IF( start_array(continue_state) );
return addresses.array_begin;
default:
log_error("Non-value found when value was expected!");
return addresses.error;
}
}
@ -248,13 +269,16 @@ struct structural_parser {
WARN_UNUSED really_inline error_code finish() {
// the string might not be NULL terminated.
if ( !structurals.at_end(doc_parser.n_structural_indexes) ) {
log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
return on_error(TAPE_ERROR);
}
end_document();
if (depth != 0) {
log_error("Unclosed objects or arrays!");
return on_error(TAPE_ERROR);
}
if (doc_parser.containing_scope[depth].tape_index != 0) {
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
return on_error(TAPE_ERROR);
}
@ -319,6 +343,7 @@ struct structural_parser {
}
WARN_UNUSED really_inline error_code start(size_t len, ret_address finish_state) {
log_start();
init(); // sets is_valid to false
if (len > doc_parser.capacity()) {
return CAPACITY;
@ -335,6 +360,28 @@ struct structural_parser {
really_inline char advance_char() {
return structurals.advance_char();
}
really_inline void log_value(const char *type) {
logger::log_line(structurals, "", type, "");
}
static really_inline void log_start() {
logger::log_start();
}
really_inline void log_start_value(const char *type) {
logger::log_line(structurals, "+", type, "");
if (logger::LOG_ENABLED) { logger::log_depth++; }
}
really_inline void log_end_value(const char *type) {
if (logger::LOG_ENABLED) { logger::log_depth--; }
logger::log_line(structurals, "-", type, "");
}
really_inline void log_error(const char *error) {
logger::log_line(structurals, "", "ERROR", error);
}
};
// Redefine FAIL_IF to use goto since it'll be used inside the function now
@ -385,6 +432,7 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, pa
);
goto finish;
default:
parser.log_error("Document starts with a non-value character");
goto error;
}
@ -395,18 +443,19 @@ object_begin:
switch (parser.advance_char()) {
case '"': {
parser.increment_count();
FAIL_IF( parser.parse_string() );
FAIL_IF( parser.parse_string(true) );
goto object_key_state;
}
case '}':
parser.end_object();
goto scope_end;
default:
parser.log_error("Object does not start with a key");
goto error;
}
object_key_state:
FAIL_IF( parser.advance_char() != ':' );
if (parser.advance_char() != ':' ) { parser.log_error("Missing colon after key in object"); goto error; }
parser.advance_char();
GOTO( parser.parse_value(addresses, addresses.object_continue) );
@ -414,13 +463,14 @@ object_continue:
switch (parser.advance_char()) {
case ',':
parser.increment_count();
FAIL_IF( parser.advance_char() != '"' );
FAIL_IF( parser.parse_string() );
if (parser.advance_char() != '"' ) { parser.log_error("Key string missing at beginning of field in object"); goto error; }
FAIL_IF( parser.parse_string(true) );
goto object_key_state;
case '}':
parser.end_object();
goto scope_end;
default:
parser.log_error("No comma between object fields");
goto error;
}
@ -452,6 +502,7 @@ array_continue:
parser.end_array();
goto scope_end;
default:
parser.log_error("Missing comma between array values");
goto error;
}

View File

@ -7,6 +7,7 @@ TARGET_HASWELL
namespace simdjson {
namespace haswell {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"

View File

@ -7,6 +7,7 @@ TARGET_WESTMERE
namespace simdjson {
namespace westmere {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"