Support cout << json, cout << minify(json)

This commit is contained in:
John Keiser 2020-03-08 17:29:15 -07:00
parent e4e89fe27a
commit acc7bd79b0
8 changed files with 677 additions and 273 deletions

View File

@ -144,29 +144,28 @@ The simplest API to get started is `document::parse()`, which allocates a new pa
```c++ ```c++
auto [doc, error] = document::parse(string("[ 1, 2, 3 ]")); auto [doc, error] = document::parse(string("[ 1, 2, 3 ]"));
if (error) { cerr << "Error: " << error_message(error) << endl; exit(1); } if (error) { cerr << "Error: " << error_message(error) << endl; exit(1); }
doc.print_json(cout); cout << doc;
``` ```
If you're using exceptions, it gets even simpler (simdjson won't use exceptions internally, so you'll only pay the performance cost of exceptions in your own calling code): If you're using exceptions, it gets even simpler (simdjson won't use exceptions internally, so you'll only pay the performance cost of exceptions in your own calling code):
```c++ ```c++
document doc = document::parse(string("[ 1, 2, 3 ]")); document doc = document::parse(string("[ 1, 2, 3 ]"));
doc.print_json(cout); cout << doc;
``` ```
The simdjson library requires SIMDJSON_PADDING extra bytes at the end of a string (it doesn't matter if the bytes are initialized). The `padded_string` class is an easy way to ensure this is accomplished up front and prevent the extra allocation: The simdjson library requires SIMDJSON_PADDING extra bytes at the end of a string (it doesn't matter if the bytes are initialized). The `padded_string` class is an easy way to ensure this is accomplished up front and prevent the extra allocation:
```c++ ```c++
document doc = document::parse(padded_string(string("[ 1, 2, 3 ]"))); document doc = document::parse(padded_string(string("[ 1, 2, 3 ]")));
doc.print_json(cout); cout << doc;
``` ```
You can also load from a file with `parser.load()`: You can also load from a file with `parser.load()`:
```c++ ```c++
document::parser parser; document::parser parser;
document doc = parser.load(filename); cout << parser.load(filename);
doc.print_json(cout);
``` ```
### Reusing the parser for maximum efficiency ### Reusing the parser for maximum efficiency
@ -179,7 +178,7 @@ hot in cache and keeping allocation to a minimum.
document::parser parser; document::parser parser;
for (padded_string json : { string("[1, 2, 3]"), string("true"), string("[ true, false ]") }) { for (padded_string json : { string("[1, 2, 3]"), string("true"), string("[ true, false ]") }) {
document& doc = parser.parse(json); document& doc = parser.parse(json);
doc.print_json(cout); cout << doc;
} }
``` ```
@ -192,7 +191,7 @@ for (int i=0;i<argc;i++) {
auto [doc, error] = parser.parse(get_corpus(argv[i])); auto [doc, error] = parser.parse(get_corpus(argv[i]));
if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); } if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); }
if (error) { cerr << error << endl; exit(1); } if (error) { cerr << error << endl; exit(1); }
doc.print_json(cout); cout << doc;
} }
``` ```
@ -208,7 +207,7 @@ for (int i=0;i<argc;i++) {
auto [doc, error] = parser.parse(get_corpus(argv[i])); auto [doc, error] = parser.parse(get_corpus(argv[i]));
if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); } if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); }
if (error) { cerr << error << endl; exit(1); } if (error) { cerr << error << endl; exit(1); }
doc.print_json(cout); cout << doc;
} }
``` ```

View File

@ -5,6 +5,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <limits> #include <limits>
#include <sstream>
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/simdjson.h" #include "simdjson/simdjson.h"
#include "simdjson/padded_string.h" #include "simdjson/padded_string.h"
@ -125,14 +126,6 @@ public:
*/ */
element_result<element> operator[](const char *s) const noexcept; element_result<element> operator[](const char *s) const noexcept;
/**
* Print this JSON to a std::ostream.
*
* @param os the stream to output to.
* @param max_depth the maximum JSON depth to output.
* @return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
*/
bool print_json(std::ostream &os, size_t max_depth=DEFAULT_MAX_DEPTH) const noexcept;
/** /**
* Dump the raw tape for debugging. * Dump the raw tape for debugging.
* *
@ -223,6 +216,8 @@ private:
class tape_ref; class tape_ref;
enum class tape_type; enum class tape_type;
inline error_code set_capacity(size_t len) noexcept; inline error_code set_capacity(size_t len) noexcept;
template<typename T>
friend class minify;
}; // class document }; // class document
/** /**
@ -418,6 +413,7 @@ protected:
really_inline uint64_t tape_value() const noexcept; really_inline uint64_t tape_value() const noexcept;
template<typename T> template<typename T>
really_inline T next_tape_value() const noexcept; really_inline T next_tape_value() const noexcept;
inline std::string_view get_string_view() const noexcept;
/** The document this element references. */ /** The document this element references. */
const document *doc; const document *doc;
@ -426,6 +422,8 @@ protected:
size_t json_index; size_t json_index;
friend class document::key_value_pair; friend class document::key_value_pair;
template<typename T>
friend class minify;
}; };
/** /**
@ -626,6 +624,8 @@ private:
friend class document; friend class document;
template<typename T> template<typename T>
friend class document::element_result; friend class document::element_result;
template<typename T>
friend class minify;
}; };
/** /**
@ -675,6 +675,8 @@ private:
friend class document::element; friend class document::element;
template<typename T> template<typename T>
friend class document::element_result; friend class document::element_result;
template<typename T>
friend class minify;
}; };
/** /**
@ -762,6 +764,8 @@ private:
friend class document::element; friend class document::element;
template<typename T> template<typename T>
friend class document::element_result; friend class document::element_result;
template<typename T>
friend class minify;
}; };
/** /**
@ -831,6 +835,7 @@ public:
inline element_result<array> as_array() const noexcept; inline element_result<array> as_array() const noexcept;
inline element_result<object> as_object() const noexcept; inline element_result<object> as_object() const noexcept;
inline operator element() const noexcept(false);
inline operator bool() const noexcept(false); inline operator bool() const noexcept(false);
inline explicit operator const char*() const noexcept(false); inline explicit operator const char*() const noexcept(false);
inline operator std::string_view() const noexcept(false); inline operator std::string_view() const noexcept(false);
@ -1595,6 +1600,157 @@ private:
friend class document::stream; friend class document::stream;
}; // class parser }; // class parser
/**
* Minifies a JSON element or document, printing the smallest possible valid JSON.
*
* document doc = document::parse(" [ 1 , 2 , 3 ] "_pad);
* cout << minify(doc) << endl; // prints [1,2,3]
*
*/
template<typename T>
class minify {
public:
/**
* Create a new minifier.
*
* @param _value The document or element to minify.
*/
inline minify(const T &_value) noexcept : value{_value} {}
/**
* Minify JSON to a string.
*/
inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
/**
* Minify JSON to an output stream.
*/
inline std::ostream& print(std::ostream& out);
private:
const T &value;
};
/**
* Minify JSON to an output stream.
*
* @param out The output stream.
* @param formatter The minifier.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
template<typename T>
inline std::ostream& operator<<(std::ostream& out, minify<T> formatter) { return formatter.print(out); }
/**
* Print JSON to an output stream.
*
* By default, the document will be printed minified.
*
* @param out The output stream.
* @param value The document to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const document &value) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const document::element &value) { return out << minify(value); };
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const document::array &value) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const document::object &value) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const document::key_value_pair &value) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const document::doc_result &value) noexcept(false) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const document::doc_ref_result &value) noexcept(false) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const document::element_result<document::element> &value) noexcept(false) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const document::element_result<document::array> &value) noexcept(false) { return out << minify(value); }
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const document::element_result<document::object> &value) noexcept(false) { return out << minify(value); }
} // namespace simdjson } // namespace simdjson
#endif // SIMDJSON_DOCUMENT_H #endif // SIMDJSON_DOCUMENT_H

View File

@ -424,6 +424,7 @@ public:
// print the json to std::ostream (should be valid) // print the json to std::ostream (should be valid)
// return false if the tape is likely wrong (e.g., you did not parse a valid // return false if the tape is likely wrong (e.g., you did not parse a valid
// JSON). // JSON).
/** @deprecated Use cout << parser.parse() */
inline bool print_json(std::ostream &os) const noexcept; inline bool print_json(std::ostream &os) const noexcept;
inline bool dump_raw_tape(std::ostream &os) const noexcept; inline bool dump_raw_tape(std::ostream &os) const noexcept;

View File

@ -136,6 +136,10 @@ inline document::element_result<document::object> document::element_result<docum
return value.as_object(); return value.as_object();
} }
inline document::element_result<document::element>::operator document::element() const noexcept(false) {
if (error) { throw simdjson_error(error); }
return value;
}
inline document::element_result<document::element>::operator bool() const noexcept(false) { inline document::element_result<document::element>::operator bool() const noexcept(false) {
return as_bool(); return as_bool();
} }
@ -240,112 +244,6 @@ inline error_code document::set_capacity(size_t capacity) noexcept {
return string_buf && tape ? SUCCESS : MEMALLOC; return string_buf && tape ? SUCCESS : MEMALLOC;
} }
inline bool document::print_json(std::ostream &os, size_t max_depth) const noexcept {
uint32_t string_length;
size_t tape_idx = 0;
uint64_t tape_val = tape[tape_idx];
uint8_t type = (tape_val >> 56);
size_t how_many = 0;
if (type == 'r') {
how_many = tape_val & internal::JSON_VALUE_MASK;
} else {
// Error: no starting root node?
return false;
}
tape_idx++;
std::unique_ptr<bool[]> in_object(new bool[max_depth]);
std::unique_ptr<size_t[]> in_object_idx(new size_t[max_depth]);
int depth = 1; // only root at level 0
in_object_idx[depth] = 0;
in_object[depth] = false;
for (; tape_idx < how_many; tape_idx++) {
tape_val = tape[tape_idx];
uint64_t payload = tape_val & internal::JSON_VALUE_MASK;
type = (tape_val >> 56);
if (!in_object[depth]) {
if ((in_object_idx[depth] > 0) && (type != ']')) {
os << ",";
}
in_object_idx[depth]++;
} else { // if (in_object) {
if ((in_object_idx[depth] > 0) && ((in_object_idx[depth] & 1) == 0) &&
(type != '}')) {
os << ",";
}
if (((in_object_idx[depth] & 1) == 1)) {
os << ":";
}
in_object_idx[depth]++;
}
switch (type) {
case '"': // we have a string
os << '"';
memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
internal::print_with_escapes(
(const unsigned char *)(string_buf.get() + payload + sizeof(uint32_t)),
os, string_length);
os << '"';
break;
case 'l': // we have a long int
if (tape_idx + 1 >= how_many) {
return false;
}
os << static_cast<int64_t>(tape[++tape_idx]);
break;
case 'u':
if (tape_idx + 1 >= how_many) {
return false;
}
os << tape[++tape_idx];
break;
case 'd': // we have a double
if (tape_idx + 1 >= how_many) {
return false;
}
double answer;
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
os << answer;
break;
case 'n': // we have a null
os << "null";
break;
case 't': // we have a true
os << "true";
break;
case 'f': // we have a false
os << "false";
break;
case '{': // we have an object
os << '{';
depth++;
in_object[depth] = true;
in_object_idx[depth] = 0;
break;
case '}': // we end an object
depth--;
os << '}';
break;
case '[': // we start an array
os << '[';
depth++;
in_object[depth] = false;
in_object_idx[depth] = 0;
break;
case ']': // we end an array
depth--;
os << ']';
break;
case 'r': // we start and end with the root node
// should we be hitting the root node?
return false;
default:
// bug?
return false;
}
}
return true;
}
inline bool document::dump_raw_tape(std::ostream &os) const noexcept { inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
uint32_t string_length; uint32_t string_length;
size_t tape_idx = 0; size_t tape_idx = 0;
@ -371,10 +269,10 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
case '"': // we have a string case '"': // we have a string
os << "string \""; os << "string \"";
memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
internal::print_with_escapes( os << internal::escape_json_string(std::string_view(
(const unsigned char *)(string_buf.get() + payload + sizeof(uint32_t)), (const char *)(string_buf.get() + payload + sizeof(uint32_t)),
os, string_length
string_length); ));
os << '"'; os << '"';
os << '\n'; os << '\n';
break; break;
@ -486,7 +384,9 @@ inline bool document::parser::is_valid() const noexcept { return valid; }
inline int document::parser::get_error_code() const noexcept { return error; } inline int document::parser::get_error_code() const noexcept { return error; }
inline std::string document::parser::get_error_message() const noexcept { return error_message(int(error)); } inline std::string document::parser::get_error_message() const noexcept { return error_message(int(error)); }
inline bool document::parser::print_json(std::ostream &os) const noexcept { inline bool document::parser::print_json(std::ostream &os) const noexcept {
return is_valid() ? doc.print_json(os) : false; if (!is_valid()) { return false; }
os << minify(doc);
return true;
} }
inline bool document::parser::dump_raw_tape(std::ostream &os) const noexcept { inline bool document::parser::dump_raw_tape(std::ostream &os) const noexcept {
return is_valid() ? doc.dump_raw_tape(os) : false; return is_valid() ? doc.dump_raw_tape(os) : false;
@ -692,6 +592,15 @@ really_inline T document::tape_ref::next_tape_value() const noexcept {
static_assert(sizeof(T) == sizeof(uint64_t)); static_assert(sizeof(T) == sizeof(uint64_t));
return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]); return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
} }
inline std::string_view document::tape_ref::get_string_view() const noexcept {
size_t string_buf_index = tape_value();
uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return std::string_view(
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
len
);
}
// //
// document::array inline implementation // document::array inline implementation
@ -842,15 +751,8 @@ inline document::element_result<const char *> document::element::as_c_str() cons
} }
inline document::element_result<std::string_view> document::element::as_string() const noexcept { inline document::element_result<std::string_view> document::element::as_string() const noexcept {
switch (type()) { switch (type()) {
case tape_type::STRING: { case tape_type::STRING:
size_t string_buf_index = tape_value(); return get_string_view();
uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return std::string_view(
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
len
);
}
default: default:
return INCORRECT_TYPE; return INCORRECT_TYPE;
} }
@ -931,6 +833,195 @@ inline document::element_result<document::element> document::element::operator[]
return obj[key]; return obj[key];
} }
//
// minify inline implementation
//
template<>
inline std::ostream& minify<document>::print(std::ostream& out) {
return out << minify<document::element>(value.root());
}
template<>
inline std::ostream& minify<document::element>::print(std::ostream& out) {
using tape_type=document::tape_type;
size_t depth = 0;
constexpr size_t MAX_DEPTH = 16;
bool is_object[MAX_DEPTH];
is_object[0] = false;
bool after_value = false;
document::tape_ref iter(value.doc, value.json_index);
do {
// print commas after each value
if (after_value) {
out << ",";
}
// If we are in an object, print the next key and :, and skip to the next value.
if (is_object[depth]) {
out << '"' << internal::escape_json_string(iter.get_string_view()) << "\":";
iter.json_index++;
}
switch (iter.type()) {
// Arrays
case tape_type::START_ARRAY: {
// If we're too deep, we need to recurse to go deeper.
depth++;
if (unlikely(depth >= MAX_DEPTH)) {
out << minify<document::array>(document::array(iter.doc, iter.json_index));
iter.json_index = iter.tape_value() - 1; // Jump to the ]
depth--;
break;
}
// Output start [
out << '[';
iter.json_index++;
// Handle empty [] (we don't want to come back around and print commas)
if (iter.type() == tape_type::END_ARRAY) {
out << ']';
depth--;
break;
}
is_object[depth] = false;
after_value = false;
continue;
}
// Objects
case tape_type::START_OBJECT: {
// If we're too deep, we need to recurse to go deeper.
depth++;
if (unlikely(depth >= MAX_DEPTH)) {
out << minify<document::object>(document::object(iter.doc, iter.json_index));
iter.json_index = iter.tape_value() - 1; // Jump to the }
depth--;
break;
}
// Output start {
out << '{';
iter.json_index++;
// Handle empty {} (we don't want to come back around and print commas)
if (iter.type() == tape_type::END_OBJECT) {
out << '}';
depth--;
break;
}
is_object[depth] = true;
after_value = false;
continue;
}
// Scalars
case tape_type::STRING:
out << '"' << internal::escape_json_string(iter.get_string_view()) << '"';
break;
case tape_type::INT64:
out << iter.next_tape_value<int64_t>();
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
break;
case tape_type::UINT64:
out << iter.next_tape_value<uint64_t>();
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
break;
case tape_type::DOUBLE:
out << iter.next_tape_value<double>();
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
break;
case tape_type::TRUE_VALUE:
out << "true";
break;
case tape_type::FALSE_VALUE:
out << "false";
break;
case tape_type::NULL_VALUE:
out << "null";
break;
// These are impossible
case tape_type::END_ARRAY:
case tape_type::END_OBJECT:
case tape_type::ROOT:
abort();
}
iter.json_index++;
after_value = true;
// Handle multiple ends in a row
while (depth != 0 && (iter.type() == tape_type::END_ARRAY || iter.type() == tape_type::END_OBJECT)) {
out << char(iter.type());
depth--;
iter.json_index++;
}
// Stop when we're at depth 0
} while (depth != 0);
return out;
}
template<>
inline std::ostream& minify<document::object>::print(std::ostream& out) {
out << '{';
auto pair = value.begin();
auto end = value.end();
if (pair != end) {
out << minify<document::key_value_pair>(*pair);
for (++pair; pair != end; ++pair) {
out << "," << minify<document::key_value_pair>(*pair);
}
}
return out << '}';
}
template<>
inline std::ostream& minify<document::array>::print(std::ostream& out) {
out << '[';
auto element = value.begin();
auto end = value.end();
if (element != end) {
out << minify<document::element>(*element);
for (++element; element != end; ++element) {
out << "," << minify<document::element>(*element);
}
}
return out << ']';
}
template<>
inline std::ostream& minify<document::key_value_pair>::print(std::ostream& out) {
return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value;
}
template<>
inline std::ostream& minify<document::doc_result>::print(std::ostream& out) {
if (value.error) { throw simdjson_error(value.error); }
return out << minify<document>(value.doc);
}
template<>
inline std::ostream& minify<document::doc_ref_result>::print(std::ostream& out) {
if (value.error) { throw simdjson_error(value.error); }
return out << minify<document>(value.doc);
}
template<>
inline std::ostream& minify<document::element_result<document::element>>::print(std::ostream& out) {
if (value.error) { throw simdjson_error(value.error); }
return out << minify<document::element>(value.value);
}
template<>
inline std::ostream& minify<document::element_result<document::array>>::print(std::ostream& out) {
if (value.error) { throw simdjson_error(value.error); }
return out << minify<document::array>(value.value);
}
template<>
inline std::ostream& minify<document::element_result<document::object>>::print(std::ostream& out) {
if (value.error) { throw simdjson_error(value.error); }
return out << minify<document::object>(value.value);
}
} // namespace simdjson } // namespace simdjson
#endif // SIMDJSON_INLINE_DOCUMENT_H #endif // SIMDJSON_INLINE_DOCUMENT_H

View File

@ -276,7 +276,7 @@ bool document_iterator<max_depth>::print(std::ostream &os, bool escape_strings)
case '"': // we have a string case '"': // we have a string
os << '"'; os << '"';
if (escape_strings) { if (escape_strings) {
internal::print_with_escapes(get_string(), os, get_string_length()); os << internal::escape_json_string(std::string_view(get_string(), get_string_length()));
} else { } else {
// was: os << get_string();, but given that we can include null chars, we // was: os << get_string();, but given that we can include null chars, we
// have to do something crazier: // have to do something crazier:

View File

@ -3,110 +3,59 @@
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <sstream>
namespace simdjson::internal { namespace simdjson::internal {
// ends with zero char class escape_json_string;
static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
while (*src) { inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str);
switch (*src) {
class escape_json_string {
public:
escape_json_string(std::string_view _str) noexcept : str{_str} {}
operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
private:
std::string_view str;
friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped);
};
inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) {
for (size_t i=0; i<unescaped.str.length(); i++) {
switch (unescaped.str[i]) {
case '\b': case '\b':
os << '\\'; out << "\\b";
os << 'b';
break; break;
case '\f': case '\f':
os << '\\'; out << "\\f";
os << 'f';
break; break;
case '\n': case '\n':
os << '\\'; out << "\\n";
os << 'n';
break; break;
case '\r': case '\r':
os << '\\'; out << "\\r";
os << 'r';
break; break;
case '\"': case '\"':
os << '\\'; out << "\\\"";
os << '"';
break; break;
case '\t': case '\t':
os << '\\'; out << "\\t";
os << 't';
break; break;
case '\\': case '\\':
os << '\\'; out << "\\\\";
os << '\\';
break; break;
default: default:
if (*src <= 0x1F) { if ((unsigned char)unescaped.str[i] <= 0x1F) {
std::ios::fmtflags f(os.flags()); // TODO can this be done once at the beginning, or will it mess up << char?
os << std::hex << std::setw(4) << std::setfill('0') std::ios::fmtflags f(out.flags());
<< static_cast<int>(*src); out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(unescaped.str[i]);
os.flags(f); out.flags(f);
} else { } else {
os << *src; out << unescaped.str[i];
} }
} }
src++;
} }
} return out;
// print len chars
static inline void print_with_escapes(const unsigned char *src,
std::ostream &os, size_t len) {
const unsigned char *finalsrc = src + len;
while (src < finalsrc) {
switch (*src) {
case '\b':
os << '\\';
os << 'b';
break;
case '\f':
os << '\\';
os << 'f';
break;
case '\n':
os << '\\';
os << 'n';
break;
case '\r':
os << '\\';
os << 'r';
break;
case '\"':
os << '\\';
os << '"';
break;
case '\t':
os << '\\';
os << 't';
break;
case '\\':
os << '\\';
os << '\\';
break;
default:
if (*src <= 0x1F) {
std::ios::fmtflags f(os.flags());
os << std::hex << std::setw(4) << std::setfill('0')
<< static_cast<int>(*src);
os.flags(f);
} else {
os << *src;
}
}
src++;
}
}
static inline void print_with_escapes(const char *src, std::ostream &os) {
print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
}
static inline void print_with_escapes(const char *src, std::ostream &os, size_t len) {
print_with_escapes(reinterpret_cast<const unsigned char *>(src), os, len);
} }
} // namespace simdjson::internal } // namespace simdjson::internal

View File

@ -8,6 +8,7 @@
#include <cmath> #include <cmath>
#include <set> #include <set>
#include <string_view> #include <string_view>
#include <sstream>
#include "simdjson.h" #include "simdjson.h"
@ -260,7 +261,7 @@ static bool parse_json_message_issue467(char const* message, std::size_t len, si
} }
bool json_issue467() { bool json_issue467() {
printf("Running json_issue467.\n"); std::cout << "Running " << __func__ << std::endl;
const char * single_message = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}"; const char * single_message = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}";
const char* two_messages = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}{\"error\":[],\"result\":{\"token\":\"xxx\"}}"; const char* two_messages = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}{\"error\":[],\"result\":{\"token\":\"xxx\"}}";
@ -275,6 +276,7 @@ bool json_issue467() {
// returns true if successful // returns true if successful
bool navigate_test() { bool navigate_test() {
std::cout << "Running " << __func__ << std::endl;
std::string json = "{" std::string json = "{"
"\"Image\": {" "\"Image\": {"
"\"Width\": 800," "\"Width\": 800,"
@ -385,7 +387,7 @@ bool navigate_test() {
// returns true if successful // returns true if successful
bool JsonStream_utf8_test() { bool JsonStream_utf8_test() {
printf("Running JsonStream_utf8_test"); std::cout << "Running " << __func__ << std::endl;
fflush(NULL); fflush(NULL);
const size_t n_records = 10000; const size_t n_records = 10000;
std::string data; std::string data;
@ -446,7 +448,7 @@ bool JsonStream_utf8_test() {
// returns true if successful // returns true if successful
bool JsonStream_test() { bool JsonStream_test() {
printf("Running JsonStream_test"); std::cout << "Running " << __func__ << std::endl;
fflush(NULL); fflush(NULL);
const size_t n_records = 10000; const size_t n_records = 10000;
std::string data; std::string data;
@ -507,7 +509,7 @@ bool JsonStream_test() {
// returns true if successful // returns true if successful
bool document_stream_test() { bool document_stream_test() {
printf("Running document_stream_test"); std::cout << "Running " << __func__ << std::endl;
fflush(NULL); fflush(NULL);
const size_t n_records = 10000; const size_t n_records = 10000;
std::string data; std::string data;
@ -555,7 +557,7 @@ bool document_stream_test() {
// returns true if successful // returns true if successful
bool document_stream_utf8_test() { bool document_stream_utf8_test() {
printf("Running document_stream_utf8_test"); std::cout << "Running " << __func__ << std::endl;
fflush(NULL); fflush(NULL);
const size_t n_records = 10000; const size_t n_records = 10000;
std::string data; std::string data;
@ -603,6 +605,7 @@ bool document_stream_utf8_test() {
// returns true if successful // returns true if successful
bool skyprophet_test() { bool skyprophet_test() {
std::cout << "Running " << __func__ << std::endl;
const size_t n_records = 100000; const size_t n_records = 100000;
std::vector<std::string> data; std::vector<std::string> data;
char buf[1024]; char buf[1024];
@ -658,6 +661,7 @@ namespace dom_api {
using namespace std; using namespace std;
using namespace simdjson; using namespace simdjson;
bool object_iterator() { bool object_iterator() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "a": 1, "b": 2, "c": 3 })"); string json(R"({ "a": 1, "b": 2, "c": 3 })");
const char* expected_key[] = { "a", "b", "c" }; const char* expected_key[] = { "a", "b", "c" };
uint64_t expected_value[] = { 1, 2, 3 }; uint64_t expected_value[] = { 1, 2, 3 };
@ -673,6 +677,7 @@ namespace dom_api {
} }
bool array_iterator() { bool array_iterator() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ 1, 10, 100 ])"); string json(R"([ 1, 10, 100 ])");
uint64_t expected_value[] = { 1, 10, 100 }; uint64_t expected_value[] = { 1, 10, 100 };
int i=0; int i=0;
@ -687,6 +692,7 @@ namespace dom_api {
} }
bool object_iterator_empty() { bool object_iterator_empty() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({})"); string json(R"({})");
int i = 0; int i = 0;
@ -700,6 +706,7 @@ namespace dom_api {
} }
bool array_iterator_empty() { bool array_iterator_empty() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([])"); string json(R"([])");
int i=0; int i=0;
@ -713,6 +720,7 @@ namespace dom_api {
} }
bool string_value() { bool string_value() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ "hi", "has backslash\\" ])"); string json(R"([ "hi", "has backslash\\" ])");
document doc = document::parse(json); document doc = document::parse(json);
auto val = document::array(doc).begin(); auto val = document::array(doc).begin();
@ -725,6 +733,7 @@ namespace dom_api {
} }
bool numeric_values() { bool numeric_values() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ 0, 1, -1, 1.1 ])"); string json(R"([ 0, 1, -1, 1.1 ])");
document doc = document::parse(json); document doc = document::parse(json);
auto val = document::array(doc).begin(); auto val = document::array(doc).begin();
@ -744,6 +753,7 @@ namespace dom_api {
} }
bool boolean_values() { bool boolean_values() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ true, false ])"); string json(R"([ true, false ])");
document doc = document::parse(json); document doc = document::parse(json);
auto val = document::array(doc).begin(); auto val = document::array(doc).begin();
@ -754,6 +764,7 @@ namespace dom_api {
} }
bool null_value() { bool null_value() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ null ])"); string json(R"([ null ])");
document doc = document::parse(json); document doc = document::parse(json);
auto val = document::array(doc).begin(); auto val = document::array(doc).begin();
@ -762,6 +773,7 @@ namespace dom_api {
} }
bool document_object_index() { bool document_object_index() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "a": 1, "b": 2, "c": 3})"); string json(R"({ "a": 1, "b": 2, "c": 3})");
document doc = document::parse(json); document doc = document::parse(json);
if (uint64_t(doc["a"]) != 1) { cerr << "Expected uint64_t(doc[\"a\"]) to be 1, was " << uint64_t(doc["a"]) << endl; return false; } if (uint64_t(doc["a"]) != 1) { cerr << "Expected uint64_t(doc[\"a\"]) to be 1, was " << uint64_t(doc["a"]) << endl; return false; }
@ -778,6 +790,7 @@ namespace dom_api {
} }
bool object_index() { bool object_index() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "obj": { "a": 1, "b": 2, "c": 3 } })"); string json(R"({ "obj": { "a": 1, "b": 2, "c": 3 } })");
document doc = document::parse(json); document doc = document::parse(json);
if (uint64_t(doc["obj"]["a"]) != 1) { cerr << "Expected uint64_t(doc[\"obj\"][\"a\"]) to be 1, was " << uint64_t(doc["obj"]["a"]) << endl; return false; } if (uint64_t(doc["obj"]["a"]) != 1) { cerr << "Expected uint64_t(doc[\"obj\"][\"a\"]) to be 1, was " << uint64_t(doc["obj"]["a"]) << endl; return false; }
@ -796,6 +809,7 @@ namespace dom_api {
} }
bool twitter_count() { bool twitter_count() {
std::cout << "Running " << __func__ << std::endl;
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
document doc = document::load(JSON_TEST_PATH); document doc = document::load(JSON_TEST_PATH);
uint64_t result_count = doc["search_metadata"]["count"]; uint64_t result_count = doc["search_metadata"]["count"];
@ -804,6 +818,7 @@ namespace dom_api {
} }
bool twitter_default_profile() { bool twitter_default_profile() {
std::cout << "Running " << __func__ << std::endl;
// Print users with a default profile. // Print users with a default profile.
set<string_view> default_users; set<string_view> default_users;
document doc = document::load(JSON_TEST_PATH); document doc = document::load(JSON_TEST_PATH);
@ -818,6 +833,7 @@ namespace dom_api {
} }
bool twitter_image_sizes() { bool twitter_image_sizes() {
std::cout << "Running " << __func__ << std::endl;
// Print image names and sizes // Print image names and sizes
set<tuple<uint64_t, uint64_t>> image_sizes; set<tuple<uint64_t, uint64_t>> image_sizes;
document doc = document::load(JSON_TEST_PATH); document doc = document::load(JSON_TEST_PATH);
@ -853,7 +869,197 @@ namespace dom_api {
} }
} }
namespace format_tests {
using namespace simdjson;
using namespace std;
const padded_string DOCUMENT(string(R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })"));
const string MINIFIED(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
if (actual.str() != expected) {
cerr << "Failed to correctly minify " << DOCUMENT.data() << endl;
cerr << "Expected: " << expected << endl;
cerr << "Actual: " << actual.str() << endl;
return false;
}
return true;
}
bool print_document_parse() {
std::cout << "Running " << __func__ << std::endl;
ostringstream s;
s << document::parse(DOCUMENT);
return assert_minified(s);
}
bool print_minify_document_parse() {
std::cout << "Running " << __func__ << std::endl;
ostringstream s;
s << minify(document::parse(DOCUMENT));
return assert_minified(s);
}
bool print_parser_parse() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
ostringstream s;
s << parser.parse(DOCUMENT);
return assert_minified(s);
}
bool print_minify_parser_parse() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
ostringstream s;
s << minify(parser.parse(DOCUMENT));
return assert_minified(s);
}
bool print_document() {
std::cout << "Running " << __func__ << std::endl;
document doc = document::parse(DOCUMENT);
ostringstream s;
s << doc;
return assert_minified(s);
}
bool print_minify_document() {
std::cout << "Running " << __func__ << std::endl;
document doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc);
return assert_minified(s);
}
bool print_document_ref() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
const document &doc_ref = parser.parse(DOCUMENT);
ostringstream s;
s << doc_ref;
return assert_minified(s);
}
bool print_minify_document_ref() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
const document &doc_ref = parser.parse(DOCUMENT);
ostringstream s;
s << minify(doc_ref);
return assert_minified(s);
}
bool print_element_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << doc["foo"];
return assert_minified(s, "1");
}
bool print_minify_element_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc["foo"]);
return assert_minified(s, "1");
}
bool print_element() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::element value = doc["foo"];
ostringstream s;
s << value;
return assert_minified(s, "1");
}
bool print_minify_element() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::element value = doc["foo"];
ostringstream s;
s << minify(value);
return assert_minified(s, "1");
}
bool print_array_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << doc["bar"].as_array();
return assert_minified(s, "[1,2,3]");
}
bool print_minify_array_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc["bar"].as_array());
return assert_minified(s, "[1,2,3]");
}
bool print_array() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::array value = doc["bar"];
ostringstream s;
s << value;
return assert_minified(s, "[1,2,3]");
}
bool print_minify_array() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::array value = doc["bar"];
ostringstream s;
s << minify(value);
return assert_minified(s, "[1,2,3]");
}
bool print_object_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << doc["baz"].as_object();
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool print_minify_object_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc["baz"].as_object());
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool print_object() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::object value = doc["baz"];
ostringstream s;
s << value;
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool print_minify_object() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::object value = doc["baz"];
ostringstream s;
s << minify(value);
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool run_tests() {
return print_document_parse() && print_minify_document_parse() &&
print_parser_parse() && print_minify_parser_parse() &&
print_document() && print_minify_document() &&
print_document_ref() && print_minify_document_ref() &&
print_element_result() && print_minify_element_result() &&
print_array_result() && print_minify_array_result() &&
print_object_result() && print_minify_object_result() &&
print_element() && print_minify_element() &&
print_array() && print_minify_array() &&
print_object() && print_minify_object();
}
}
bool error_messages_in_correct_order() { bool error_messages_in_correct_order() {
std::cout << "Running " << __func__ << std::endl;
using namespace simdjson; using namespace simdjson;
using namespace simdjson::internal; using namespace simdjson::internal;
using namespace std; using namespace std;
@ -895,6 +1101,8 @@ int main() {
return EXIT_FAILURE; return EXIT_FAILURE;
if (!dom_api::run_tests()) if (!dom_api::run_tests())
return EXIT_FAILURE; return EXIT_FAILURE;
if (!format_tests::run_tests())
return EXIT_FAILURE;
if(!document_stream_test()) if(!document_stream_test())
return EXIT_FAILURE; return EXIT_FAILURE;
if(!document_stream_utf8_test()) if(!document_stream_utf8_test())

View File

@ -9,60 +9,61 @@ void document_parse_error_code() {
string json("[ 1, 2, 3 ]"); string json("[ 1, 2, 3 ]");
auto [doc, error] = document::parse(json); auto [doc, error] = document::parse(json);
if (error) { cerr << "Error: " << error << endl; exit(1); } if (error) { cerr << "Error: " << error << endl; exit(1); }
if (!doc.print_json(cout)) { exit(1); } cout << doc << endl;
cout << endl;
} }
void document_parse_exception() { void document_parse_exception() {
cout << __func__ << endl; cout << __func__ << endl;
string json("[ 1, 2, 3 ]"); string json("[ 1, 2, 3 ]");
document doc = document::parse(json); cout << document::parse(json) << endl;
if (!doc.print_json(cout)) { exit(1); }
cout << endl;
} }
void document_parse_padded_string() { void document_parse_padded_string() {
cout << __func__ << endl; cout << __func__ << endl;
padded_string json(string("[ 1, 2, 3 ]")); padded_string json(string("[ 1, 2, 3 ]"));
document doc = document::parse(json); cout << document::parse(json) << endl;
if (!doc.print_json(cout)) { exit(1); }
cout << endl;
} }
void document_parse_get_corpus() { void document_parse_get_corpus() {
cout << __func__ << endl; cout << __func__ << endl;
auto json(get_corpus("jsonexamples/small/demo.json")); auto json = get_corpus("jsonexamples/small/demo.json");
document doc = document::parse(json); cout << document::parse(json) << endl;
if (!doc.print_json(cout)) { exit(1); }
cout << endl;
} }
void document_load() { void document_load() {
cout << __func__ << endl; cout << __func__ << endl;
document doc = document::load("jsonexamples/small/demo.json"); cout << document::load("jsonexamples/small/demo.json") << endl;
if (!doc.print_json(cout)) { exit(1); }
cout << endl;
} }
void parser_parse() { void parser_parse_error_code() {
cout << __func__ << endl; cout << __func__ << endl;
// Allocate a parser big enough for all files // Allocate a parser big enough for all files
document::parser parser; document::parser parser;
simdjson::error_code capacity_error = parser.set_capacity(1024*1024);
if (capacity_error) { cerr << "Error setting capacity: " << capacity_error << endl; exit(1); }
// Read files with the parser, one by one // Read files with the parser, one by one
for (padded_string json : { string("[1, 2, 3]"), string("true"), string("[ true, false ]") }) { for (padded_string json : { string("[1, 2, 3]"), string("true"), string("[ true, false ]") }) {
cout << "Parsing " << json.data() << " ..." << endl; cout << "Parsing " << json.data() << " ..." << endl;
auto [doc, error] = parser.parse(json); auto [doc, error] = parser.parse(json);
if (error) { cerr << "Error: " << error << endl; exit(1); } if (error) { cerr << "Error: " << error << endl; exit(1); }
if (!doc.print_json(cout)) { cerr << "print failed!\n"; exit(1); } cout << doc << endl;
cout << endl; }
}
void parser_parse_exception() {
cout << __func__ << endl;
// Allocate a parser big enough for all files
document::parser parser;
// Read files with the parser, one by one
for (padded_string json : { string("[1, 2, 3]"), string("true"), string("[ true, false ]") }) {
cout << "Parsing " << json.data() << " ..." << endl;
cout << parser.parse(json) << endl;
} }
} }
@ -75,8 +76,7 @@ void parser_parse_many_error_code() {
document::parser parser; document::parser parser;
for (auto [doc, error] : parser.parse_many(json)) { for (auto [doc, error] : parser.parse_many(json)) {
if (error) { cerr << "Error: " << error << endl; exit(1); } if (error) { cerr << "Error: " << error << endl; exit(1); }
if (!doc.print_json(cout)) { exit(1); } cout << doc << endl;
cout << endl;
} }
} }
@ -88,8 +88,7 @@ void parser_parse_many_exception() {
cout << "Parsing " << json.data() << " ..." << endl; cout << "Parsing " << json.data() << " ..." << endl;
document::parser parser; document::parser parser;
for (const document &doc : parser.parse_many(json)) { for (const document &doc : parser.parse_many(json)) {
if (!doc.print_json(cout)) { exit(1); } cout << doc << endl;
cout << endl;
} }
} }
@ -101,7 +100,7 @@ void parser_parse_max_capacity() {
auto [doc, error] = parser.parse(argv[i]); auto [doc, error] = parser.parse(argv[i]);
if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); } if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); }
if (error) { cerr << error << endl; exit(1); } if (error) { cerr << error << endl; exit(1); }
doc.print_json(cout); cout << doc << endl;
} }
} }
@ -115,7 +114,7 @@ void parser_parse_fixed_capacity() {
auto [doc, error] = parser.parse(argv[i]); auto [doc, error] = parser.parse(argv[i]);
if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); } if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); }
if (error) { cerr << error << endl; exit(1); } if (error) { cerr << error << endl; exit(1); }
doc.print_json(cout); cout << doc << endl;
} }
} }
@ -126,7 +125,8 @@ int main() {
document_parse_padded_string(); document_parse_padded_string();
document_parse_get_corpus(); document_parse_get_corpus();
document_load(); document_load();
parser_parse(); parser_parse_error_code();
parser_parse_exception();
parser_parse_many_error_code(); parser_parse_many_error_code();
parser_parse_many_exception(); parser_parse_many_exception();
parser_parse_max_capacity(); parser_parse_max_capacity();