Faster and more correct serialization (#1168)
* Adding new files. * Better. * Fixing minifier and adding tests. * Adding benchmarks. * Including the array header. * Replacing old stream-based code by the new code. * Doubling up the itoa. * Hidden away to_chars in internal namespace. * Removing the repetitions. * Documented the atoi functions. * Tuning the escape sequences. * Moving the operators off the main namespace. * Added more tests. * Tweaking the implementation so that it works with and without exp. * The string_builder template and mini_formatter class are not part of our public API and are subject to change at any time! * Adding a benchmark and some optimization. * Cleaning. * Strictly speaking, this header is needed.
This commit is contained in:
parent
f410213003
commit
60c139a844
|
@ -63,16 +63,136 @@ static void serialize_twitter(State& state) {
|
||||||
bytes += serial.size();
|
bytes += serial.size();
|
||||||
benchmark::DoNotOptimize(serial);
|
benchmark::DoNotOptimize(serial);
|
||||||
}
|
}
|
||||||
|
// we validate the result
|
||||||
|
{
|
||||||
|
auto serial = simdjson::minify(doc);
|
||||||
|
dom::element doc2; // we parse the minified output
|
||||||
|
if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
|
||||||
|
auto serial2 = simdjson::minify(doc2); // we minify a second time
|
||||||
|
if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
|
||||||
|
}
|
||||||
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||||
state.counters["Gigabytes"] = benchmark::Counter(
|
state.counters["Gigabytes"] = benchmark::Counter(
|
||||||
double(bytes), benchmark::Counter::kIsRate,
|
double(bytes), benchmark::Counter::kIsRate,
|
||||||
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||||
}
|
}
|
||||||
BENCHMARK(serialize_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
BENCHMARK(serialize_twitter)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||||
return *(std::max_element(std::begin(v), std::end(v)));
|
return *(std::max_element(std::begin(v), std::end(v)));
|
||||||
})->DisplayAggregatesOnly(true);
|
})->DisplayAggregatesOnly(true);
|
||||||
|
|
||||||
|
|
||||||
|
static void serialize_big_string_to_string(State& state) {
|
||||||
|
dom::parser parser;
|
||||||
|
std::vector<char> content;
|
||||||
|
content.push_back('\"');
|
||||||
|
for(size_t i = 0 ; i < 100000; i ++) {
|
||||||
|
content.push_back('0' + char(i%10)); // we add what looks like a long list of digits
|
||||||
|
}
|
||||||
|
content.push_back('\"');
|
||||||
|
dom::element doc;
|
||||||
|
simdjson::error_code error;
|
||||||
|
if ((error = parser.parse(content.data(), content.size()).get(doc))) {
|
||||||
|
cerr << "could not parse big string" << error << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
size_t bytes = 0;
|
||||||
|
for (SIMDJSON_UNUSED auto _ : state) {
|
||||||
|
auto serial = simdjson::to_string(doc);
|
||||||
|
bytes += serial.size();
|
||||||
|
benchmark::DoNotOptimize(serial);
|
||||||
|
}
|
||||||
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||||
|
state.counters["Gigabytes"] = benchmark::Counter(
|
||||||
|
double(bytes), benchmark::Counter::kIsRate,
|
||||||
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||||
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||||
|
}
|
||||||
|
BENCHMARK(serialize_big_string_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||||
|
return *(std::max_element(std::begin(v), std::end(v)));
|
||||||
|
})->DisplayAggregatesOnly(true);
|
||||||
|
|
||||||
|
|
||||||
|
static void serialize_twitter_to_string(State& state) {
|
||||||
|
dom::parser parser;
|
||||||
|
padded_string docdata;
|
||||||
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
||||||
|
if(error) {
|
||||||
|
cerr << "could not parse twitter.json" << error << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// we do not want mem. alloc. in the loop.
|
||||||
|
if((error = parser.allocate(docdata.size()))) {
|
||||||
|
cout << error << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dom::element doc;
|
||||||
|
if ((error = parser.parse(docdata).get(doc))) {
|
||||||
|
cerr << "could not parse twitter.json" << error << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
size_t bytes = 0;
|
||||||
|
for (SIMDJSON_UNUSED auto _ : state) {
|
||||||
|
auto serial = simdjson::to_string(doc);
|
||||||
|
bytes += serial.size();
|
||||||
|
benchmark::DoNotOptimize(serial);
|
||||||
|
}
|
||||||
|
// we validate the result
|
||||||
|
{
|
||||||
|
auto serial = simdjson::to_string(doc);
|
||||||
|
dom::element doc2; // we parse the stringify output
|
||||||
|
if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
|
||||||
|
auto serial2 = simdjson::to_string(doc2); // we stringify again
|
||||||
|
if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
|
||||||
|
}
|
||||||
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||||
|
state.counters["Gigabytes"] = benchmark::Counter(
|
||||||
|
double(bytes), benchmark::Counter::kIsRate,
|
||||||
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||||
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||||
|
}
|
||||||
|
BENCHMARK(serialize_twitter_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||||
|
return *(std::max_element(std::begin(v), std::end(v)));
|
||||||
|
})->DisplayAggregatesOnly(true);
|
||||||
|
|
||||||
|
static void serialize_twitter_string_builder(State& state) {
|
||||||
|
dom::parser parser;
|
||||||
|
padded_string docdata;
|
||||||
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
||||||
|
if(error) {
|
||||||
|
cerr << "could not parse twitter.json" << error << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// we do not want mem. alloc. in the loop.
|
||||||
|
if((error = parser.allocate(docdata.size()))) {
|
||||||
|
cout << error << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dom::element doc;
|
||||||
|
if ((error = parser.parse(docdata).get(doc))) {
|
||||||
|
cerr << "could not parse twitter.json" << error << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
size_t bytes = 0;
|
||||||
|
simdjson::internal::string_builder<> sb;// not part of our public API, for internal use
|
||||||
|
for (SIMDJSON_UNUSED auto _ : state) {
|
||||||
|
sb.clear();
|
||||||
|
sb.append(doc);
|
||||||
|
std::string_view serial = sb.str();
|
||||||
|
bytes += serial.size();
|
||||||
|
benchmark::DoNotOptimize(serial);
|
||||||
|
}
|
||||||
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||||
|
state.counters["Gigabytes"] = benchmark::Counter(
|
||||||
|
double(bytes), benchmark::Counter::kIsRate,
|
||||||
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||||
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||||
|
}
|
||||||
|
BENCHMARK(serialize_twitter_string_builder)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||||
|
return *(std::max_element(std::begin(v), std::end(v)));
|
||||||
|
})->DisplayAggregatesOnly(true);
|
||||||
|
|
||||||
|
|
||||||
static void numbers_scan(State& state) {
|
static void numbers_scan(State& state) {
|
||||||
// Prints the number of results in twitter.json
|
// Prints the number of results in twitter.json
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
|
|
|
@ -43,6 +43,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
|
||||||
// Public API
|
// Public API
|
||||||
#include "simdjson/simdjson_version.h"
|
#include "simdjson/simdjson_version.h"
|
||||||
#include "simdjson/error.h"
|
#include "simdjson/error.h"
|
||||||
|
#include "simdjson/minify.h"
|
||||||
#include "simdjson/padded_string.h"
|
#include "simdjson/padded_string.h"
|
||||||
#include "simdjson/implementation.h"
|
#include "simdjson/implementation.h"
|
||||||
#include "simdjson/dom/array.h"
|
#include "simdjson/dom/array.h"
|
||||||
|
@ -51,6 +52,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
|
||||||
#include "simdjson/dom/element.h"
|
#include "simdjson/dom/element.h"
|
||||||
#include "simdjson/dom/object.h"
|
#include "simdjson/dom/object.h"
|
||||||
#include "simdjson/dom/parser.h"
|
#include "simdjson/dom/parser.h"
|
||||||
|
#include "simdjson/dom/serialization.h"
|
||||||
|
|
||||||
// Deprecated API
|
// Deprecated API
|
||||||
#include "simdjson/dom/jsonparser.h"
|
#include "simdjson/dom/jsonparser.h"
|
||||||
|
@ -68,6 +70,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
|
||||||
#include "simdjson/dom/parsedjson_iterator-inl.h"
|
#include "simdjson/dom/parsedjson_iterator-inl.h"
|
||||||
#include "simdjson/dom/parser-inl.h"
|
#include "simdjson/dom/parser-inl.h"
|
||||||
#include "simdjson/internal/tape_ref-inl.h"
|
#include "simdjson/internal/tape_ref-inl.h"
|
||||||
|
#include "simdjson/dom/serialization-inl.h"
|
||||||
|
|
||||||
SIMDJSON_POP_DISABLE_WARNINGS
|
SIMDJSON_POP_DISABLE_WARNINGS
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,15 @@
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
/**
|
||||||
|
* @private
|
||||||
|
* Our own implementation of the C++17 to_chars function.
|
||||||
|
* Defined in src/to_chars
|
||||||
|
*/
|
||||||
|
char *to_chars(char *first, const char *last, double value);
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef SIMDJSON_EXCEPTIONS
|
#ifndef SIMDJSON_EXCEPTIONS
|
||||||
#if __cpp_exceptions
|
#if __cpp_exceptions
|
||||||
#define SIMDJSON_EXCEPTIONS 1
|
#define SIMDJSON_EXCEPTIONS 1
|
||||||
|
|
|
@ -144,39 +144,9 @@ inline bool array::iterator::operator>=(const array::iterator& other) const noex
|
||||||
inline bool array::iterator::operator>(const array::iterator& other) const noexcept {
|
inline bool array::iterator::operator>(const array::iterator& other) const noexcept {
|
||||||
return tape.json_index > other.tape.json_index;
|
return tape.json_index > other.tape.json_index;
|
||||||
}
|
}
|
||||||
inline std::ostream& operator<<(std::ostream& out, const array &value) {
|
|
||||||
return out << minify<array>(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
|
|
||||||
template<>
|
|
||||||
inline std::ostream& minifier<dom::array>::print(std::ostream& out) {
|
|
||||||
out << '[';
|
|
||||||
auto iter = value.begin();
|
|
||||||
auto end = value.end();
|
|
||||||
if (iter != end) {
|
|
||||||
out << minify<dom::element>(*iter);
|
|
||||||
for (++iter; iter != end; ++iter) {
|
|
||||||
out << "," << minify<dom::element>(*iter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out << ']';
|
|
||||||
}
|
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
|
||||||
|
|
||||||
template<>
|
|
||||||
inline std::ostream& minifier<simdjson_result<dom::array>>::print(std::ostream& out) {
|
|
||||||
if (value.error()) { throw simdjson_error(value.error()); }
|
|
||||||
return out << minify<dom::array>(value.first);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false) {
|
|
||||||
return out << minify<simdjson_result<dom::array>>(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
|
|
|
@ -4,10 +4,13 @@
|
||||||
#include "simdjson/common_defs.h"
|
#include "simdjson/common_defs.h"
|
||||||
#include "simdjson/error.h"
|
#include "simdjson/error.h"
|
||||||
#include "simdjson/internal/tape_ref.h"
|
#include "simdjson/internal/tape_ref.h"
|
||||||
#include "simdjson/minify.h"
|
|
||||||
#include <ostream>
|
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
template<typename T>
|
||||||
|
class string_builder;
|
||||||
|
}
|
||||||
namespace dom {
|
namespace dom {
|
||||||
|
|
||||||
class document;
|
class document;
|
||||||
|
@ -125,19 +128,9 @@ private:
|
||||||
friend class element;
|
friend class element;
|
||||||
friend struct simdjson_result<element>;
|
friend struct simdjson_result<element>;
|
||||||
template<typename T>
|
template<typename T>
|
||||||
friend class simdjson::minifier;
|
friend class simdjson::internal::string_builder;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Print JSON to an output stream.
|
|
||||||
*
|
|
||||||
* By default, the value will be printed minified.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
||||||
*/
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const array &value);
|
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
|
|
||||||
|
@ -159,20 +152,7 @@ public:
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
#endif // SIMDJSON_EXCEPTIONS
|
||||||
};
|
};
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
|
||||||
/**
|
|
||||||
* Print JSON to an output stream.
|
|
||||||
*
|
|
||||||
* By default, the value will be printed minified.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
||||||
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
||||||
* thrown).
|
|
||||||
*/
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
#define SIMDJSON_DOM_DOCUMENT_H
|
#define SIMDJSON_DOM_DOCUMENT_H
|
||||||
|
|
||||||
#include "simdjson/common_defs.h"
|
#include "simdjson/common_defs.h"
|
||||||
#include "simdjson/minify.h"
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
|
||||||
|
@ -67,8 +66,6 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
inline error_code allocate(size_t len) noexcept;
|
inline error_code allocate(size_t len) noexcept;
|
||||||
template<typename T>
|
|
||||||
friend class simdjson::minifier;
|
|
||||||
friend class parser;
|
friend class parser;
|
||||||
}; // class document
|
}; // class document
|
||||||
|
|
||||||
|
|
|
@ -387,9 +387,6 @@ inline bool element::dump_raw_tape(std::ostream &out) const noexcept {
|
||||||
return tape.doc->dump_raw_tape(out);
|
return tape.doc->dump_raw_tape(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const element &value) {
|
|
||||||
return out << minify<element>(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, element_type type) {
|
inline std::ostream& operator<<(std::ostream& out, element_type type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
@ -416,143 +413,6 @@ inline std::ostream& operator<<(std::ostream& out, element_type type) {
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
|
|
||||||
template<>
|
|
||||||
inline std::ostream& minifier<dom::element>::print(std::ostream& out) {
|
|
||||||
using tape_type=internal::tape_type;
|
|
||||||
size_t depth = 0;
|
|
||||||
constexpr size_t MAX_DEPTH = 16;
|
|
||||||
bool is_object[MAX_DEPTH];
|
|
||||||
is_object[0] = false;
|
|
||||||
bool after_value = false;
|
|
||||||
|
|
||||||
internal::tape_ref iter(value.tape);
|
|
||||||
do {
|
|
||||||
// print commas after each value
|
|
||||||
if (after_value) {
|
|
||||||
out << ",";
|
|
||||||
}
|
|
||||||
// If we are in an object, print the next key and :, and skip to the next value.
|
|
||||||
if (is_object[depth]) {
|
|
||||||
out << '"' << internal::escape_json_string(iter.get_string_view()) << "\":";
|
|
||||||
iter.json_index++;
|
|
||||||
}
|
|
||||||
switch (iter.tape_ref_type()) {
|
|
||||||
|
|
||||||
// Arrays
|
|
||||||
case tape_type::START_ARRAY: {
|
|
||||||
// If we're too deep, we need to recurse to go deeper.
|
|
||||||
depth++;
|
|
||||||
if (simdjson_unlikely(depth >= MAX_DEPTH)) {
|
|
||||||
out << minify<dom::array>(dom::array(iter));
|
|
||||||
iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
|
|
||||||
depth--;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output start [
|
|
||||||
out << '[';
|
|
||||||
iter.json_index++;
|
|
||||||
|
|
||||||
// Handle empty [] (we don't want to come back around and print commas)
|
|
||||||
if (iter.tape_ref_type() == tape_type::END_ARRAY) {
|
|
||||||
out << ']';
|
|
||||||
depth--;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
is_object[depth] = false;
|
|
||||||
after_value = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Objects
|
|
||||||
case tape_type::START_OBJECT: {
|
|
||||||
// If we're too deep, we need to recurse to go deeper.
|
|
||||||
depth++;
|
|
||||||
if (simdjson_unlikely(depth >= MAX_DEPTH)) {
|
|
||||||
out << minify<dom::object>(dom::object(iter));
|
|
||||||
iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
|
|
||||||
depth--;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output start {
|
|
||||||
out << '{';
|
|
||||||
iter.json_index++;
|
|
||||||
|
|
||||||
// Handle empty {} (we don't want to come back around and print commas)
|
|
||||||
if (iter.tape_ref_type() == tape_type::END_OBJECT) {
|
|
||||||
out << '}';
|
|
||||||
depth--;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
is_object[depth] = true;
|
|
||||||
after_value = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scalars
|
|
||||||
case tape_type::STRING:
|
|
||||||
out << '"' << internal::escape_json_string(iter.get_string_view()) << '"';
|
|
||||||
break;
|
|
||||||
case tape_type::INT64:
|
|
||||||
out << iter.next_tape_value<int64_t>();
|
|
||||||
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
|
|
||||||
break;
|
|
||||||
case tape_type::UINT64:
|
|
||||||
out << iter.next_tape_value<uint64_t>();
|
|
||||||
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
|
|
||||||
break;
|
|
||||||
case tape_type::DOUBLE:
|
|
||||||
out << iter.next_tape_value<double>();
|
|
||||||
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
|
|
||||||
break;
|
|
||||||
case tape_type::TRUE_VALUE:
|
|
||||||
out << "true";
|
|
||||||
break;
|
|
||||||
case tape_type::FALSE_VALUE:
|
|
||||||
out << "false";
|
|
||||||
break;
|
|
||||||
case tape_type::NULL_VALUE:
|
|
||||||
out << "null";
|
|
||||||
break;
|
|
||||||
|
|
||||||
// These are impossible
|
|
||||||
case tape_type::END_ARRAY:
|
|
||||||
case tape_type::END_OBJECT:
|
|
||||||
case tape_type::ROOT:
|
|
||||||
out << "unexpected content!!!"; // abort() usage is forbidden in the library
|
|
||||||
}
|
|
||||||
iter.json_index++;
|
|
||||||
after_value = true;
|
|
||||||
|
|
||||||
// Handle multiple ends in a row
|
|
||||||
while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || iter.tape_ref_type() == tape_type::END_OBJECT)) {
|
|
||||||
out << char(iter.tape_ref_type());
|
|
||||||
depth--;
|
|
||||||
iter.json_index++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stop when we're at depth 0
|
|
||||||
} while (depth != 0);
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
|
||||||
|
|
||||||
template<>
|
|
||||||
simdjson_really_inline std::ostream& minifier<simdjson_result<dom::element>>::print(std::ostream& out) {
|
|
||||||
if (value.error()) { throw simdjson_error(value.error()); }
|
|
||||||
return out << minify<dom::element>(value.first);
|
|
||||||
}
|
|
||||||
|
|
||||||
simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) {
|
|
||||||
return out << minify<simdjson_result<dom::element>>(value);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
#endif // SIMDJSON_INLINE_ELEMENT_H
|
#endif // SIMDJSON_INLINE_ELEMENT_H
|
||||||
|
|
|
@ -4,12 +4,14 @@
|
||||||
#include "simdjson/common_defs.h"
|
#include "simdjson/common_defs.h"
|
||||||
#include "simdjson/error.h"
|
#include "simdjson/error.h"
|
||||||
#include "simdjson/internal/tape_ref.h"
|
#include "simdjson/internal/tape_ref.h"
|
||||||
#include "simdjson/minify.h"
|
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
namespace internal {
|
||||||
|
template<typename T>
|
||||||
|
class string_builder;
|
||||||
|
}
|
||||||
namespace dom {
|
namespace dom {
|
||||||
|
|
||||||
class array;
|
class array;
|
||||||
class document;
|
class document;
|
||||||
class object;
|
class object;
|
||||||
|
@ -473,29 +475,10 @@ private:
|
||||||
friend class array;
|
friend class array;
|
||||||
friend struct simdjson_result<element>;
|
friend struct simdjson_result<element>;
|
||||||
template<typename T>
|
template<typename T>
|
||||||
friend class simdjson::minifier;
|
friend class simdjson::internal::string_builder;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Print JSON to an output stream.
|
|
||||||
*
|
|
||||||
* By default, the value will be printed minified.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
||||||
*/
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const element &value);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Print element type to an output stream.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
||||||
*/
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, element_type type);
|
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
|
|
||||||
/** The result of a JSON navigation that may fail. */
|
/** The result of a JSON navigation that may fail. */
|
||||||
|
@ -557,20 +540,6 @@ public:
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
#endif // SIMDJSON_EXCEPTIONS
|
||||||
};
|
};
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
|
||||||
/**
|
|
||||||
* Print JSON to an output stream.
|
|
||||||
*
|
|
||||||
* By default, the value will be printed minified.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
||||||
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
||||||
* thrown).
|
|
||||||
*/
|
|
||||||
simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
|
|
|
@ -236,47 +236,8 @@ inline bool object::iterator::key_equals_case_insensitive(std::string_view o) co
|
||||||
inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
|
inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
|
||||||
key(_key), value(_value) {}
|
key(_key), value(_value) {}
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const object &value) {
|
|
||||||
return out << minify<object>(value);
|
|
||||||
}
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value) {
|
|
||||||
return out << minify<key_value_pair>(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
|
|
||||||
template<>
|
|
||||||
inline std::ostream& minifier<dom::object>::print(std::ostream& out) {
|
|
||||||
out << '{';
|
|
||||||
auto pair = value.begin();
|
|
||||||
auto end = value.end();
|
|
||||||
if (pair != end) {
|
|
||||||
out << minify<dom::key_value_pair>(*pair);
|
|
||||||
for (++pair; pair != end; ++pair) {
|
|
||||||
out << "," << minify<dom::key_value_pair>(*pair);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out << '}';
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
inline std::ostream& minifier<dom::key_value_pair>::print(std::ostream& out) {
|
|
||||||
return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
|
||||||
|
|
||||||
template<>
|
|
||||||
inline std::ostream& minifier<simdjson_result<dom::object>>::print(std::ostream& out) {
|
|
||||||
if (value.error()) { throw simdjson_error(value.error()); }
|
|
||||||
return out << minify<dom::object>(value.first);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false) {
|
|
||||||
return out << minify<simdjson_result<dom::object>>(value);
|
|
||||||
}
|
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
#if defined(__cpp_lib_ranges)
|
#if defined(__cpp_lib_ranges)
|
||||||
|
|
|
@ -4,10 +4,12 @@
|
||||||
#include "simdjson/common_defs.h"
|
#include "simdjson/common_defs.h"
|
||||||
#include "simdjson/error.h"
|
#include "simdjson/error.h"
|
||||||
#include "simdjson/internal/tape_ref.h"
|
#include "simdjson/internal/tape_ref.h"
|
||||||
#include "simdjson/minify.h"
|
|
||||||
#include <ostream>
|
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
namespace internal {
|
||||||
|
template<typename T>
|
||||||
|
class string_builder;
|
||||||
|
}
|
||||||
namespace dom {
|
namespace dom {
|
||||||
|
|
||||||
class document;
|
class document;
|
||||||
|
@ -211,7 +213,7 @@ private:
|
||||||
friend class element;
|
friend class element;
|
||||||
friend struct simdjson_result<element>;
|
friend struct simdjson_result<element>;
|
||||||
template<typename T>
|
template<typename T>
|
||||||
friend class simdjson::minifier;
|
friend class simdjson::internal::string_builder;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -229,27 +231,6 @@ private:
|
||||||
friend class object;
|
friend class object;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Print JSON to an output stream.
|
|
||||||
*
|
|
||||||
* By default, the value will be printed minified.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
||||||
*/
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const object &value);
|
|
||||||
/**
|
|
||||||
* Print JSON to an output stream.
|
|
||||||
*
|
|
||||||
* By default, the value will be printed minified.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
||||||
*/
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value);
|
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
|
|
||||||
/** The result of a JSON conversion that may fail. */
|
/** The result of a JSON conversion that may fail. */
|
||||||
|
@ -273,21 +254,6 @@ public:
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
#endif // SIMDJSON_EXCEPTIONS
|
||||||
};
|
};
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
|
||||||
/**
|
|
||||||
* Print JSON to an output stream.
|
|
||||||
*
|
|
||||||
* By default, the value will be printed minified.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param value The value to print.
|
|
||||||
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
||||||
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
||||||
* thrown).
|
|
||||||
*/
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false);
|
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
#if defined(__cpp_lib_ranges)
|
#if defined(__cpp_lib_ranges)
|
||||||
|
|
|
@ -25,11 +25,7 @@ simdjson_really_inline parser &parser::operator=(parser &&other) noexcept = defa
|
||||||
inline bool parser::is_valid() const noexcept { return valid; }
|
inline bool parser::is_valid() const noexcept { return valid; }
|
||||||
inline int parser::get_error_code() const noexcept { return error; }
|
inline int parser::get_error_code() const noexcept { return error; }
|
||||||
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
|
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
|
||||||
inline bool parser::print_json(std::ostream &os) const noexcept {
|
|
||||||
if (!valid) { return false; }
|
|
||||||
os << doc.root();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
|
inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
|
||||||
return valid ? doc.dump_raw_tape(os) : false;
|
return valid ? doc.dump_raw_tape(os) : false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
#include "simdjson/error.h"
|
#include "simdjson/error.h"
|
||||||
#include "simdjson/internal/dom_parser_implementation.h"
|
#include "simdjson/internal/dom_parser_implementation.h"
|
||||||
#include "simdjson/internal/tape_ref.h"
|
#include "simdjson/internal/tape_ref.h"
|
||||||
#include "simdjson/minify.h"
|
|
||||||
#include "simdjson/padded_string.h"
|
#include "simdjson/padded_string.h"
|
||||||
#include "simdjson/portability.h"
|
#include "simdjson/portability.h"
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
|
@ -0,0 +1,421 @@
|
||||||
|
|
||||||
|
#ifndef SIMDJSON_SERIALIZATION_INL_H
|
||||||
|
#define SIMDJSON_SERIALIZATION_INL_H
|
||||||
|
|
||||||
|
#include "simdjson/dom/serialization.h"
|
||||||
|
|
||||||
|
#include <cinttypes>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace dom {
|
||||||
|
inline bool parser::print_json(std::ostream &os) const noexcept {
|
||||||
|
if (!valid) { return false; }
|
||||||
|
simdjson::internal::string_builder<> sb;
|
||||||
|
sb.append(doc.root());
|
||||||
|
std::string_view answer = sb.str();
|
||||||
|
os << answer;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/***
|
||||||
|
* Number utility functions
|
||||||
|
**/
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/**@private
|
||||||
|
* Escape sequence like \b or \u0001
|
||||||
|
* We expect that most compilers will use 8 bytes for this data structure.
|
||||||
|
**/
|
||||||
|
struct escape_sequence {
|
||||||
|
uint8_t length;
|
||||||
|
const char string[7]; // technically, we only ever need 6 characters, we pad to 8
|
||||||
|
};
|
||||||
|
/**@private
|
||||||
|
* This converts a signed integer into a character sequence.
|
||||||
|
* The caller is responsible for providing enough memory (at least
|
||||||
|
* 20 characters.)
|
||||||
|
* Though various runtime libraries provide itoa functions,
|
||||||
|
* it is not part of the C++ standard. The C++17 standard
|
||||||
|
* adds the to_chars functions which would do as well, but
|
||||||
|
* we want to support C++11.
|
||||||
|
*/
|
||||||
|
char *fast_itoa(char *output, int64_t value) noexcept {
|
||||||
|
// This is a standard implementation of itoa.
|
||||||
|
// We first write in reverse order and then reverse.
|
||||||
|
if(value < 0) {
|
||||||
|
*output++ = '-';
|
||||||
|
value = -value;
|
||||||
|
}
|
||||||
|
char *write_pointer = output;
|
||||||
|
do {
|
||||||
|
*write_pointer++ = char('0' + (value % 10));
|
||||||
|
value /= 10;
|
||||||
|
} while (value != 0);
|
||||||
|
// then we reverse the result
|
||||||
|
char *const answer = write_pointer;
|
||||||
|
char *second_write_pointer = output;
|
||||||
|
write_pointer -= 1;
|
||||||
|
while (second_write_pointer < write_pointer) {
|
||||||
|
char c1 = *write_pointer;
|
||||||
|
char c2 = *second_write_pointer;
|
||||||
|
*second_write_pointer = c1;
|
||||||
|
*write_pointer = c2;
|
||||||
|
write_pointer--;
|
||||||
|
second_write_pointer++;
|
||||||
|
}
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
/**@private
|
||||||
|
* This converts an unsigned integer into a character sequence.
|
||||||
|
* The caller is responsible for providing enough memory (at least
|
||||||
|
* 19 characters.)
|
||||||
|
* Though various runtime libraries provide itoa functions,
|
||||||
|
* it is not part of the C++ standard. The C++17 standard
|
||||||
|
* adds the to_chars functions which would do as well, but
|
||||||
|
* we want to support C++11.
|
||||||
|
*/
|
||||||
|
char *fast_itoa(char *output, uint64_t value) noexcept {
|
||||||
|
// This is a standard implementation of itoa.
|
||||||
|
// We first write in reverse order and then reverse.
|
||||||
|
char *write_pointer = output;
|
||||||
|
do {
|
||||||
|
*write_pointer++ = char('0' + (value % 10));
|
||||||
|
value /= 10;
|
||||||
|
} while (value != 0);
|
||||||
|
// then we reverse the result
|
||||||
|
char *const answer = write_pointer;
|
||||||
|
char *second_write_pointer = output;
|
||||||
|
write_pointer -= 1;
|
||||||
|
while (second_write_pointer < write_pointer) {
|
||||||
|
char c1 = *write_pointer;
|
||||||
|
char c2 = *second_write_pointer;
|
||||||
|
*second_write_pointer = c1;
|
||||||
|
*write_pointer = c2;
|
||||||
|
write_pointer--;
|
||||||
|
second_write_pointer++;
|
||||||
|
}
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
} // anonymous namespace
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
/***
|
||||||
|
* Minifier/formatter code.
|
||||||
|
**/
|
||||||
|
|
||||||
|
simdjson_really_inline void mini_formatter::number(uint64_t x) {
|
||||||
|
char number_buffer[24];
|
||||||
|
char *newp = fast_itoa(number_buffer, x);
|
||||||
|
buffer.insert(buffer.end(), number_buffer, newp);
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline void mini_formatter::number(int64_t x) {
|
||||||
|
char number_buffer[24];
|
||||||
|
char *newp = fast_itoa(number_buffer, x);
|
||||||
|
buffer.insert(buffer.end(), number_buffer, newp);
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline void mini_formatter::number(double x) {
|
||||||
|
char number_buffer[24];
|
||||||
|
// Currently, passing the nullptr to the second argument is
|
||||||
|
// safe because our implementation does not check the second
|
||||||
|
// argument.
|
||||||
|
char *newp = internal::to_chars(number_buffer, nullptr, x);
|
||||||
|
buffer.insert(buffer.end(), number_buffer, newp);
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline void mini_formatter::start_array() { one_char('['); }
|
||||||
|
simdjson_really_inline void mini_formatter::end_array() { one_char(']'); }
|
||||||
|
simdjson_really_inline void mini_formatter::start_object() { one_char('{'); }
|
||||||
|
simdjson_really_inline void mini_formatter::end_object() { one_char('}'); }
|
||||||
|
simdjson_really_inline void mini_formatter::comma() { one_char(','); }
|
||||||
|
|
||||||
|
|
||||||
|
simdjson_really_inline void mini_formatter::true_atom() {
|
||||||
|
const char * s = "true";
|
||||||
|
buffer.insert(buffer.end(), s, s + 4);
|
||||||
|
}
|
||||||
|
simdjson_really_inline void mini_formatter::false_atom() {
|
||||||
|
const char * s = "false";
|
||||||
|
buffer.insert(buffer.end(), s, s + 5);
|
||||||
|
}
|
||||||
|
simdjson_really_inline void mini_formatter::null_atom() {
|
||||||
|
const char * s = "null";
|
||||||
|
buffer.insert(buffer.end(), s, s + 4);
|
||||||
|
}
|
||||||
|
simdjson_really_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
|
||||||
|
simdjson_really_inline void mini_formatter::key(std::string_view unescaped) {
|
||||||
|
string(unescaped);
|
||||||
|
one_char(':');
|
||||||
|
}
|
||||||
|
simdjson_really_inline void mini_formatter::string(std::string_view unescaped) {
|
||||||
|
one_char('\"');
|
||||||
|
size_t i = 0;
|
||||||
|
// Fast path for the case where we have no control character, no ", and no backslash.
|
||||||
|
// This should include most keys.
|
||||||
|
constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
for(;i + 8 <= unescaped.length(); i += 8) {
|
||||||
|
// Poor's man vectorization. This could get much faster if we used SIMD.
|
||||||
|
if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
|
||||||
|
| needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
|
||||||
|
| needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
|
||||||
|
| needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
|
||||||
|
) { break; }
|
||||||
|
}
|
||||||
|
for(;i < unescaped.length(); i++) {
|
||||||
|
if(needs_escaping[uint8_t(unescaped[i])]) { break; }
|
||||||
|
}
|
||||||
|
// The following is also possible and omits a 256-byte table, but it is slower:
|
||||||
|
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
|
||||||
|
// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
|
||||||
|
|
||||||
|
// At least for long strings, the following should be fast. We could
|
||||||
|
// do better by integrating the checks and the insertion.
|
||||||
|
buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
|
||||||
|
// We caught a control character if we enter this loop (slow).
|
||||||
|
// Note that we are do not restart from the beginning, but rather we continue
|
||||||
|
// from the point where we encountered something that requires escaping.
|
||||||
|
for (; i < unescaped.length(); i++) {
|
||||||
|
switch (unescaped[i]) {
|
||||||
|
case '\"':
|
||||||
|
{
|
||||||
|
const char * s = "\\\"";
|
||||||
|
buffer.insert(buffer.end(), s, s + 2);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '\\':
|
||||||
|
{
|
||||||
|
const char * s = "\\\\";
|
||||||
|
buffer.insert(buffer.end(), s, s + 2);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (uint8_t(unescaped[i]) <= 0x1F) {
|
||||||
|
// If packed, this uses 8 * 32 bytes.
|
||||||
|
// Note that we expect most compilers to embed this code in the data
|
||||||
|
// section.
|
||||||
|
constexpr static escape_sequence escaped[32] = {
|
||||||
|
{6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
|
||||||
|
{6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
|
||||||
|
{2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"},
|
||||||
|
{2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"},
|
||||||
|
{6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
|
||||||
|
{6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
|
||||||
|
{6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
|
||||||
|
{6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
|
||||||
|
auto u = escaped[uint8_t(unescaped[i])];
|
||||||
|
buffer.insert(buffer.end(), u.string, u.string + u.length);
|
||||||
|
} else {
|
||||||
|
one_char(unescaped[i]);
|
||||||
|
}
|
||||||
|
} // switch
|
||||||
|
} // for
|
||||||
|
one_char('\"');
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void mini_formatter::clear() {
|
||||||
|
buffer.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline std::string_view mini_formatter::str() const {
|
||||||
|
return std::string_view(buffer.data(), buffer.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***
|
||||||
|
* String building code.
|
||||||
|
**/
|
||||||
|
|
||||||
|
template <class serializer>
|
||||||
|
inline void string_builder<serializer>::append(simdjson::dom::element value) {
|
||||||
|
// using tape_type = simdjson::internal::tape_type;
|
||||||
|
size_t depth = 0;
|
||||||
|
constexpr size_t MAX_DEPTH = 16;
|
||||||
|
bool is_object[MAX_DEPTH];
|
||||||
|
is_object[0] = false;
|
||||||
|
bool after_value = false;
|
||||||
|
|
||||||
|
internal::tape_ref iter(value.tape);
|
||||||
|
do {
|
||||||
|
// print commas after each value
|
||||||
|
if (after_value) {
|
||||||
|
format.comma();
|
||||||
|
}
|
||||||
|
// If we are in an object, print the next key and :, and skip to the next
|
||||||
|
// value.
|
||||||
|
if (is_object[depth]) {
|
||||||
|
format.key(iter.get_string_view());
|
||||||
|
iter.json_index++;
|
||||||
|
}
|
||||||
|
switch (iter.tape_ref_type()) {
|
||||||
|
|
||||||
|
// Arrays
|
||||||
|
case tape_type::START_ARRAY: {
|
||||||
|
// If we're too deep, we need to recurse to go deeper.
|
||||||
|
depth++;
|
||||||
|
if (simdjson_unlikely(depth >= MAX_DEPTH)) {
|
||||||
|
append(simdjson::dom::array(iter));
|
||||||
|
iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
|
||||||
|
depth--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output start [
|
||||||
|
format.start_array();
|
||||||
|
iter.json_index++;
|
||||||
|
|
||||||
|
// Handle empty [] (we don't want to come back around and print commas)
|
||||||
|
if (iter.tape_ref_type() == tape_type::END_ARRAY) {
|
||||||
|
format.end_array();
|
||||||
|
depth--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_object[depth] = false;
|
||||||
|
after_value = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Objects
|
||||||
|
case tape_type::START_OBJECT: {
|
||||||
|
// If we're too deep, we need to recurse to go deeper.
|
||||||
|
depth++;
|
||||||
|
if (simdjson_unlikely(depth >= MAX_DEPTH)) {
|
||||||
|
append(simdjson::dom::object(iter));
|
||||||
|
iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
|
||||||
|
depth--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output start {
|
||||||
|
format.start_object();
|
||||||
|
iter.json_index++;
|
||||||
|
|
||||||
|
// Handle empty {} (we don't want to come back around and print commas)
|
||||||
|
if (iter.tape_ref_type() == tape_type::END_OBJECT) {
|
||||||
|
format.end_object();
|
||||||
|
depth--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_object[depth] = true;
|
||||||
|
after_value = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scalars
|
||||||
|
case tape_type::STRING:
|
||||||
|
format.string(iter.get_string_view());
|
||||||
|
break;
|
||||||
|
case tape_type::INT64:
|
||||||
|
format.number(iter.next_tape_value<int64_t>());
|
||||||
|
iter.json_index++; // numbers take up 2 spots, so we need to increment
|
||||||
|
// extra
|
||||||
|
break;
|
||||||
|
case tape_type::UINT64:
|
||||||
|
format.number(iter.next_tape_value<uint64_t>());
|
||||||
|
iter.json_index++; // numbers take up 2 spots, so we need to increment
|
||||||
|
// extra
|
||||||
|
break;
|
||||||
|
case tape_type::DOUBLE:
|
||||||
|
format.number(iter.next_tape_value<double>());
|
||||||
|
iter.json_index++; // numbers take up 2 spots, so we need to increment
|
||||||
|
// extra
|
||||||
|
break;
|
||||||
|
case tape_type::TRUE_VALUE:
|
||||||
|
format.true_atom();
|
||||||
|
break;
|
||||||
|
case tape_type::FALSE_VALUE:
|
||||||
|
format.false_atom();
|
||||||
|
break;
|
||||||
|
case tape_type::NULL_VALUE:
|
||||||
|
format.null_atom();
|
||||||
|
break;
|
||||||
|
|
||||||
|
// These are impossible
|
||||||
|
case tape_type::END_ARRAY:
|
||||||
|
case tape_type::END_OBJECT:
|
||||||
|
case tape_type::ROOT:
|
||||||
|
SIMDJSON_UNREACHABLE();
|
||||||
|
}
|
||||||
|
iter.json_index++;
|
||||||
|
after_value = true;
|
||||||
|
|
||||||
|
// Handle multiple ends in a row
|
||||||
|
while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
|
||||||
|
iter.tape_ref_type() == tape_type::END_OBJECT)) {
|
||||||
|
if (iter.tape_ref_type() == tape_type::END_ARRAY) {
|
||||||
|
format.end_array();
|
||||||
|
} else {
|
||||||
|
format.end_object();
|
||||||
|
}
|
||||||
|
depth--;
|
||||||
|
iter.json_index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop when we're at depth 0
|
||||||
|
} while (depth != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class serializer>
|
||||||
|
inline void string_builder<serializer>::append(simdjson::dom::object value) {
|
||||||
|
format.start_object();
|
||||||
|
auto pair = value.begin();
|
||||||
|
auto end = value.end();
|
||||||
|
if (pair != end) {
|
||||||
|
append(*pair);
|
||||||
|
for (++pair; pair != end; ++pair) {
|
||||||
|
format.comma();
|
||||||
|
append(*pair);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format.end_object();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class serializer>
|
||||||
|
inline void string_builder<serializer>::append(simdjson::dom::array value) {
|
||||||
|
format.start_array();
|
||||||
|
auto iter = value.begin();
|
||||||
|
auto end = value.end();
|
||||||
|
if (iter != end) {
|
||||||
|
append(*iter);
|
||||||
|
for (++iter; iter != end; ++iter) {
|
||||||
|
format.comma();
|
||||||
|
append(*iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format.end_array();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class serializer>
|
||||||
|
simdjson_really_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
|
||||||
|
format.key(kv.key);
|
||||||
|
append(kv.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class serializer>
|
||||||
|
simdjson_really_inline void string_builder<serializer>::clear() {
|
||||||
|
format.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class serializer>
|
||||||
|
simdjson_really_inline std::string_view string_builder<serializer>::str() const {
|
||||||
|
return format.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,219 @@
|
||||||
|
#ifndef SIMDJSON_SERIALIZATION_H
|
||||||
|
#define SIMDJSON_SERIALIZATION_H
|
||||||
|
|
||||||
|
#include "simdjson/common_defs.h"
|
||||||
|
#include "simdjson/dom/document.h"
|
||||||
|
#include "simdjson/error.h"
|
||||||
|
#include "simdjson/internal/dom_parser_implementation.h"
|
||||||
|
#include "simdjson/internal/tape_ref.h"
|
||||||
|
#include "simdjson/padded_string.h"
|
||||||
|
#include "simdjson/portability.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The string_builder template and mini_formatter class
|
||||||
|
* are not part of our public API and are subject to change
|
||||||
|
* at any time!
|
||||||
|
*/
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
class mini_formatter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private The string_builder template allows us to construct
|
||||||
|
* a string from a document element. It is parametrized
|
||||||
|
* by a "formatter" which handles the details. Thus
|
||||||
|
* the string_builder template could support both minification
|
||||||
|
* and prettification, and various other tradeoffs.
|
||||||
|
*/
|
||||||
|
template <class formatter = mini_formatter>
|
||||||
|
class string_builder {
|
||||||
|
public:
|
||||||
|
/** Construct an initially empty builder, would print the empty string **/
|
||||||
|
string_builder() = default;
|
||||||
|
/** Append an element to the builder (to be printed) **/
|
||||||
|
inline void append(simdjson::dom::element value);
|
||||||
|
/** Append an array to the builder (to be printed) **/
|
||||||
|
inline void append(simdjson::dom::array value);
|
||||||
|
/** Append an objet to the builder (to be printed) **/
|
||||||
|
inline void append(simdjson::dom::object value);
|
||||||
|
/** Reset the builder (so that it would print the empty string) **/
|
||||||
|
simdjson_really_inline void clear();
|
||||||
|
/**
|
||||||
|
* Get access to the string. The string_view is owned by the builder
|
||||||
|
* and it is invalid to use it after the string_builder has been
|
||||||
|
* destroyed.
|
||||||
|
* However you can make a copy of the string_view on memory that you
|
||||||
|
* own.
|
||||||
|
*/
|
||||||
|
simdjson_really_inline std::string_view str() const;
|
||||||
|
/** Append a key_value_pair to the builder (to be printed) **/
|
||||||
|
simdjson_really_inline void append(simdjson::dom::key_value_pair value);
|
||||||
|
private:
|
||||||
|
formatter format{};
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private This is the class that we expect to use with the string_builder
|
||||||
|
* template. It tries to produce a compact version of the JSON element
|
||||||
|
* as quickly as possible.
|
||||||
|
*/
|
||||||
|
class mini_formatter {
|
||||||
|
public:
|
||||||
|
mini_formatter() = default;
|
||||||
|
/** Add a comma **/
|
||||||
|
simdjson_really_inline void comma();
|
||||||
|
/** Start an array, prints [ **/
|
||||||
|
simdjson_really_inline void start_array();
|
||||||
|
/** End an array, prints ] **/
|
||||||
|
simdjson_really_inline void end_array();
|
||||||
|
/** Start an array, prints { **/
|
||||||
|
simdjson_really_inline void start_object();
|
||||||
|
/** Start an array, prints } **/
|
||||||
|
simdjson_really_inline void end_object();
|
||||||
|
/** Prints a true **/
|
||||||
|
simdjson_really_inline void true_atom();
|
||||||
|
/** Prints a false **/
|
||||||
|
simdjson_really_inline void false_atom();
|
||||||
|
/** Prints a null **/
|
||||||
|
simdjson_really_inline void null_atom();
|
||||||
|
/** Prints a number **/
|
||||||
|
simdjson_really_inline void number(int64_t x);
|
||||||
|
/** Prints a number **/
|
||||||
|
simdjson_really_inline void number(uint64_t x);
|
||||||
|
/** Prints a number **/
|
||||||
|
simdjson_really_inline void number(double x);
|
||||||
|
/** Prints a key (string + colon) **/
|
||||||
|
simdjson_really_inline void key(std::string_view unescaped);
|
||||||
|
/** Prints a string. The string is escaped as needed. **/
|
||||||
|
simdjson_really_inline void string(std::string_view unescaped);
|
||||||
|
/** Clears out the content. **/
|
||||||
|
simdjson_really_inline void clear();
|
||||||
|
/**
|
||||||
|
* Get access to the buffer, it is own by the instance, but
|
||||||
|
* the user can make a copy.
|
||||||
|
**/
|
||||||
|
simdjson_really_inline std::string_view str() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// implementation details (subject to change)
|
||||||
|
/** Prints one character **/
|
||||||
|
simdjson_really_inline void one_char(char c);
|
||||||
|
/** Backing buffer **/
|
||||||
|
std::vector<char> buffer{}; // not ideal!
|
||||||
|
};
|
||||||
|
|
||||||
|
} // internal
|
||||||
|
|
||||||
|
namespace dom {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print JSON to an output stream.
|
||||||
|
*
|
||||||
|
* @param out The output stream.
|
||||||
|
* @param value The element.
|
||||||
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
||||||
|
*/
|
||||||
|
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
|
||||||
|
simdjson::internal::string_builder<> sb;
|
||||||
|
sb.append(value);
|
||||||
|
return (out << sb.str());
|
||||||
|
}
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
|
||||||
|
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
|
||||||
|
return (out << x.value());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/**
|
||||||
|
* Print JSON to an output stream.
|
||||||
|
*
|
||||||
|
* @param out The output stream.
|
||||||
|
* @param value The array.
|
||||||
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
||||||
|
*/
|
||||||
|
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) {
|
||||||
|
simdjson::internal::string_builder<> sb;
|
||||||
|
sb.append(value);
|
||||||
|
return (out << sb.str());
|
||||||
|
}
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
|
||||||
|
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
|
||||||
|
return (out << x.value());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/**
|
||||||
|
* Print JSON to an output stream.
|
||||||
|
*
|
||||||
|
* @param out The output stream.
|
||||||
|
* @param value The objet.
|
||||||
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
||||||
|
*/
|
||||||
|
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) {
|
||||||
|
simdjson::internal::string_builder<> sb;
|
||||||
|
sb.append(value);
|
||||||
|
return (out << sb.str());
|
||||||
|
}
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::object> x) {
|
||||||
|
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
|
||||||
|
return (out << x.value());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} // namespace dom
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts JSON to a string.
|
||||||
|
*
|
||||||
|
* dom::parser parser;
|
||||||
|
* element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded);
|
||||||
|
* cout << to_string(doc) << endl; // prints [1,2,3]
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
std::string to_string(T x) {
|
||||||
|
// in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/
|
||||||
|
// Currently minify and to_string are identical but in the future, they may
|
||||||
|
// differ.
|
||||||
|
simdjson::internal::string_builder<> sb;
|
||||||
|
sb.append(x);
|
||||||
|
std::string_view answer = sb.str();
|
||||||
|
return std::string(answer.data(), answer.size());
|
||||||
|
}
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
template <class T>
|
||||||
|
std::string to_string(simdjson_result<T> x) {
|
||||||
|
if (x.error()) { throw simdjson_error(x.error()); }
|
||||||
|
return to_string(x.value());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minifies a JSON element or document, printing the smallest possible valid JSON.
|
||||||
|
*
|
||||||
|
* dom::parser parser;
|
||||||
|
* element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded);
|
||||||
|
* cout << minify(doc) << endl; // prints [1,2,3]
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
std::string minify(T x) {
|
||||||
|
return to_string(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
template <class T>
|
||||||
|
std::string minify(simdjson_result<T> x) {
|
||||||
|
if (x.error()) { throw simdjson_error(x.error()); }
|
||||||
|
return to_string(x.value());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -27,50 +27,6 @@ namespace simdjson {
|
||||||
*/
|
*/
|
||||||
SIMDJSON_WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept;
|
SIMDJSON_WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* Minifies a JSON element or document, printing the smallest possible valid JSON.
|
|
||||||
*
|
|
||||||
* dom::parser parser;
|
|
||||||
* element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded);
|
|
||||||
* cout << minify(doc) << endl; // prints [1,2,3]
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
template<typename T>
|
|
||||||
class minifier {
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Create a new minifier.
|
|
||||||
*
|
|
||||||
* @param _value The document or element to minify.
|
|
||||||
*/
|
|
||||||
inline minifier(const T &_value) noexcept : value{_value} {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Minify JSON to a string.
|
|
||||||
*/
|
|
||||||
inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Minify JSON to an output stream.
|
|
||||||
*/
|
|
||||||
inline std::ostream& print(std::ostream& out);
|
|
||||||
private:
|
|
||||||
const T &value;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
inline minifier<T> minify(const T &value) noexcept { return minifier<T>(value); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Minify JSON to an output stream.
|
|
||||||
*
|
|
||||||
* @param out The output stream.
|
|
||||||
* @param formatter The minifier.
|
|
||||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
||||||
*/
|
|
||||||
template<typename T>
|
|
||||||
inline std::ostream& operator<<(std::ostream& out, minifier<T> formatter) { return formatter.print(out); }
|
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
#endif // SIMDJSON_MINIFY_H
|
#endif // SIMDJSON_MINIFY_H
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
SIMDJSON_PUSH_DISABLE_WARNINGS
|
SIMDJSON_PUSH_DISABLE_WARNINGS
|
||||||
SIMDJSON_DISABLE_UNDESIRED_WARNINGS
|
SIMDJSON_DISABLE_UNDESIRED_WARNINGS
|
||||||
|
#include "to_chars.cpp"
|
||||||
#include "error.cpp"
|
#include "error.cpp"
|
||||||
#include "implementation.cpp"
|
#include "implementation.cpp"
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,946 @@
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <array>
|
||||||
|
namespace simdjson {
|
||||||
|
namespace internal {
|
||||||
|
/*!
|
||||||
|
implements the Grisu2 algorithm for binary to decimal floating-point
|
||||||
|
conversion.
|
||||||
|
Adapted from JSON for Modern C++
|
||||||
|
|
||||||
|
This implementation is a slightly modified version of the reference
|
||||||
|
implementation which may be obtained from
|
||||||
|
http://florian.loitsch.com/publications (bench.tar.gz).
|
||||||
|
The code is distributed under the MIT license, Copyright (c) 2009 Florian
|
||||||
|
Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing
|
||||||
|
Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the
|
||||||
|
ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation,
|
||||||
|
PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and
|
||||||
|
Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming
|
||||||
|
Language Design and Implementation, PLDI 1996
|
||||||
|
*/
|
||||||
|
namespace dtoa_impl {
|
||||||
|
|
||||||
|
template <typename Target, typename Source>
|
||||||
|
Target reinterpret_bits(const Source source) {
|
||||||
|
static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
|
||||||
|
|
||||||
|
Target target;
|
||||||
|
std::memcpy(&target, &source, sizeof(Source));
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct diyfp // f * 2^e
|
||||||
|
{
|
||||||
|
static constexpr int kPrecision = 64; // = q
|
||||||
|
|
||||||
|
std::uint64_t f = 0;
|
||||||
|
int e = 0;
|
||||||
|
|
||||||
|
constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@brief returns x - y
|
||||||
|
@pre x.e == y.e and x.f >= y.f
|
||||||
|
*/
|
||||||
|
static diyfp sub(const diyfp &x, const diyfp &y) noexcept {
|
||||||
|
|
||||||
|
return {x.f - y.f, x.e};
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@brief returns x * y
|
||||||
|
@note The result is rounded. (Only the upper q bits are returned.)
|
||||||
|
*/
|
||||||
|
static diyfp mul(const diyfp &x, const diyfp &y) noexcept {
|
||||||
|
static_assert(kPrecision == 64, "internal error");
|
||||||
|
|
||||||
|
// Computes:
|
||||||
|
// f = round((x.f * y.f) / 2^q)
|
||||||
|
// e = x.e + y.e + q
|
||||||
|
|
||||||
|
// Emulate the 64-bit * 64-bit multiplication:
|
||||||
|
//
|
||||||
|
// p = u * v
|
||||||
|
// = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
|
||||||
|
// = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) +
|
||||||
|
// 2^64 (u_hi v_hi ) = (p0 ) + 2^32 ((p1 ) + (p2 ))
|
||||||
|
// + 2^64 (p3 ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo +
|
||||||
|
// 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 ) =
|
||||||
|
// (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi +
|
||||||
|
// p2_hi + p3) = (p0_lo ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) +
|
||||||
|
// 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H )
|
||||||
|
//
|
||||||
|
// (Since Q might be larger than 2^32 - 1)
|
||||||
|
//
|
||||||
|
// = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
|
||||||
|
//
|
||||||
|
// (Q_hi + H does not overflow a 64-bit int)
|
||||||
|
//
|
||||||
|
// = p_lo + 2^64 p_hi
|
||||||
|
|
||||||
|
const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
|
||||||
|
const std::uint64_t u_hi = x.f >> 32u;
|
||||||
|
const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
|
||||||
|
const std::uint64_t v_hi = y.f >> 32u;
|
||||||
|
|
||||||
|
const std::uint64_t p0 = u_lo * v_lo;
|
||||||
|
const std::uint64_t p1 = u_lo * v_hi;
|
||||||
|
const std::uint64_t p2 = u_hi * v_lo;
|
||||||
|
const std::uint64_t p3 = u_hi * v_hi;
|
||||||
|
|
||||||
|
const std::uint64_t p0_hi = p0 >> 32u;
|
||||||
|
const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
|
||||||
|
const std::uint64_t p1_hi = p1 >> 32u;
|
||||||
|
const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
|
||||||
|
const std::uint64_t p2_hi = p2 >> 32u;
|
||||||
|
|
||||||
|
std::uint64_t Q = p0_hi + p1_lo + p2_lo;
|
||||||
|
|
||||||
|
// The full product might now be computed as
|
||||||
|
//
|
||||||
|
// p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
|
||||||
|
// p_lo = p0_lo + (Q << 32)
|
||||||
|
//
|
||||||
|
// But in this particular case here, the full p_lo is not required.
|
||||||
|
// Effectively we only need to add the highest bit in p_lo to p_hi (and
|
||||||
|
// Q_hi + 1 does not overflow).
|
||||||
|
|
||||||
|
Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
|
||||||
|
|
||||||
|
const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
|
||||||
|
|
||||||
|
return {h, x.e + y.e + 64};
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@brief normalize x such that the significand is >= 2^(q-1)
|
||||||
|
@pre x.f != 0
|
||||||
|
*/
|
||||||
|
static diyfp normalize(diyfp x) noexcept {
|
||||||
|
|
||||||
|
while ((x.f >> 63u) == 0) {
|
||||||
|
x.f <<= 1u;
|
||||||
|
x.e--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@brief normalize x such that the result has the exponent E
|
||||||
|
@pre e >= x.e and the upper e - x.e bits of x.f must be zero.
|
||||||
|
*/
|
||||||
|
static diyfp normalize_to(const diyfp &x,
|
||||||
|
const int target_exponent) noexcept {
|
||||||
|
const int delta = x.e - target_exponent;
|
||||||
|
|
||||||
|
return {x.f << delta, target_exponent};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct boundaries {
|
||||||
|
diyfp w;
|
||||||
|
diyfp minus;
|
||||||
|
diyfp plus;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Compute the (normalized) diyfp representing the input number 'value' and its
|
||||||
|
boundaries.
|
||||||
|
@pre value must be finite and positive
|
||||||
|
*/
|
||||||
|
template <typename FloatType> boundaries compute_boundaries(FloatType value) {
|
||||||
|
|
||||||
|
// Convert the IEEE representation into a diyfp.
|
||||||
|
//
|
||||||
|
// If v is denormal:
|
||||||
|
// value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1))
|
||||||
|
// If v is normalized:
|
||||||
|
// value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
|
||||||
|
|
||||||
|
static_assert(std::numeric_limits<FloatType>::is_iec559,
|
||||||
|
"internal error: dtoa_short requires an IEEE-754 "
|
||||||
|
"floating-point implementation");
|
||||||
|
|
||||||
|
constexpr int kPrecision =
|
||||||
|
std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
|
||||||
|
constexpr int kBias =
|
||||||
|
std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
|
||||||
|
constexpr int kMinExp = 1 - kBias;
|
||||||
|
constexpr std::uint64_t kHiddenBit = std::uint64_t{1}
|
||||||
|
<< (kPrecision - 1); // = 2^(p-1)
|
||||||
|
|
||||||
|
using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t,
|
||||||
|
std::uint64_t>::type;
|
||||||
|
|
||||||
|
const std::uint64_t bits = reinterpret_bits<bits_type>(value);
|
||||||
|
const std::uint64_t E = bits >> (kPrecision - 1);
|
||||||
|
const std::uint64_t F = bits & (kHiddenBit - 1);
|
||||||
|
|
||||||
|
const bool is_denormal = E == 0;
|
||||||
|
const diyfp v = is_denormal
|
||||||
|
? diyfp(F, kMinExp)
|
||||||
|
: diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
|
||||||
|
|
||||||
|
// Compute the boundaries m- and m+ of the floating-point value
|
||||||
|
// v = f * 2^e.
|
||||||
|
//
|
||||||
|
// Determine v- and v+, the floating-point predecessor and successor if v,
|
||||||
|
// respectively.
|
||||||
|
//
|
||||||
|
// v- = v - 2^e if f != 2^(p-1) or e == e_min (A)
|
||||||
|
// = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B)
|
||||||
|
//
|
||||||
|
// v+ = v + 2^e
|
||||||
|
//
|
||||||
|
// Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
|
||||||
|
// between m- and m+ round to v, regardless of how the input rounding
|
||||||
|
// algorithm breaks ties.
|
||||||
|
//
|
||||||
|
// ---+-------------+-------------+-------------+-------------+--- (A)
|
||||||
|
// v- m- v m+ v+
|
||||||
|
//
|
||||||
|
// -----------------+------+------+-------------+-------------+--- (B)
|
||||||
|
// v- m- v m+ v+
|
||||||
|
|
||||||
|
const bool lower_boundary_is_closer = F == 0 && E > 1;
|
||||||
|
const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
|
||||||
|
const diyfp m_minus = lower_boundary_is_closer
|
||||||
|
? diyfp(4 * v.f - 1, v.e - 2) // (B)
|
||||||
|
: diyfp(2 * v.f - 1, v.e - 1); // (A)
|
||||||
|
|
||||||
|
// Determine the normalized w+ = m+.
|
||||||
|
const diyfp w_plus = diyfp::normalize(m_plus);
|
||||||
|
|
||||||
|
// Determine w- = m- such that e_(w-) = e_(w+).
|
||||||
|
const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
|
||||||
|
|
||||||
|
return {diyfp::normalize(v), w_minus, w_plus};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given normalized diyfp w, Grisu needs to find a (normalized) cached
|
||||||
|
// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
|
||||||
|
// within a certain range [alpha, gamma] (Definition 3.2 from [1])
|
||||||
|
//
|
||||||
|
// alpha <= e = e_c + e_w + q <= gamma
|
||||||
|
//
|
||||||
|
// or
|
||||||
|
//
|
||||||
|
// f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
|
||||||
|
// <= f_c * f_w * 2^gamma
|
||||||
|
//
|
||||||
|
// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
|
||||||
|
//
|
||||||
|
// 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
|
||||||
|
//
|
||||||
|
// or
|
||||||
|
//
|
||||||
|
// 2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
|
||||||
|
//
|
||||||
|
// The choice of (alpha,gamma) determines the size of the table and the form of
|
||||||
|
// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
|
||||||
|
// in practice:
|
||||||
|
//
|
||||||
|
// The idea is to cut the number c * w = f * 2^e into two parts, which can be
|
||||||
|
// processed independently: An integral part p1, and a fractional part p2:
|
||||||
|
//
|
||||||
|
// f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
|
||||||
|
// = (f div 2^-e) + (f mod 2^-e) * 2^e
|
||||||
|
// = p1 + p2 * 2^e
|
||||||
|
//
|
||||||
|
// The conversion of p1 into decimal form requires a series of divisions and
|
||||||
|
// modulos by (a power of) 10. These operations are faster for 32-bit than for
|
||||||
|
// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
|
||||||
|
// achieved by choosing
|
||||||
|
//
|
||||||
|
// -e >= 32 or e <= -32 := gamma
|
||||||
|
//
|
||||||
|
// In order to convert the fractional part
|
||||||
|
//
|
||||||
|
// p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
|
||||||
|
//
|
||||||
|
// into decimal form, the fraction is repeatedly multiplied by 10 and the digits
|
||||||
|
// d[-i] are extracted in order:
|
||||||
|
//
|
||||||
|
// (10 * p2) div 2^-e = d[-1]
|
||||||
|
// (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
|
||||||
|
//
|
||||||
|
// The multiplication by 10 must not overflow. It is sufficient to choose
|
||||||
|
//
|
||||||
|
// 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
|
||||||
|
//
|
||||||
|
// Since p2 = f mod 2^-e < 2^-e,
|
||||||
|
//
|
||||||
|
// -e <= 60 or e >= -60 := alpha
|
||||||
|
|
||||||
|
constexpr int kAlpha = -60;
|
||||||
|
constexpr int kGamma = -32;
|
||||||
|
|
||||||
|
struct cached_power // c = f * 2^e ~= 10^k
|
||||||
|
{
|
||||||
|
std::uint64_t f;
|
||||||
|
int e;
|
||||||
|
int k;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
|
||||||
|
power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
|
||||||
|
satisfies (Definition 3.2 from [1])
|
||||||
|
alpha <= e_c + e + q <= gamma.
|
||||||
|
*/
|
||||||
|
inline cached_power get_cached_power_for_binary_exponent(int e) {
|
||||||
|
// Now
|
||||||
|
//
|
||||||
|
// alpha <= e_c + e + q <= gamma (1)
|
||||||
|
// ==> f_c * 2^alpha <= c * 2^e * 2^q
|
||||||
|
//
|
||||||
|
// and since the c's are normalized, 2^(q-1) <= f_c,
|
||||||
|
//
|
||||||
|
// ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
|
||||||
|
// ==> 2^(alpha - e - 1) <= c
|
||||||
|
//
|
||||||
|
// If c were an exact power of ten, i.e. c = 10^k, one may determine k as
|
||||||
|
//
|
||||||
|
// k = ceil( log_10( 2^(alpha - e - 1) ) )
|
||||||
|
// = ceil( (alpha - e - 1) * log_10(2) )
|
||||||
|
//
|
||||||
|
// From the paper:
|
||||||
|
// "In theory the result of the procedure could be wrong since c is rounded,
|
||||||
|
// and the computation itself is approximated [...]. In practice, however,
|
||||||
|
// this simple function is sufficient."
|
||||||
|
//
|
||||||
|
// For IEEE double precision floating-point numbers converted into
|
||||||
|
// normalized diyfp's w = f * 2^e, with q = 64,
|
||||||
|
//
|
||||||
|
// e >= -1022 (min IEEE exponent)
|
||||||
|
// -52 (p - 1)
|
||||||
|
// -52 (p - 1, possibly normalize denormal IEEE numbers)
|
||||||
|
// -11 (normalize the diyfp)
|
||||||
|
// = -1137
|
||||||
|
//
|
||||||
|
// and
|
||||||
|
//
|
||||||
|
// e <= +1023 (max IEEE exponent)
|
||||||
|
// -52 (p - 1)
|
||||||
|
// -11 (normalize the diyfp)
|
||||||
|
// = 960
|
||||||
|
//
|
||||||
|
// This binary exponent range [-1137,960] results in a decimal exponent
|
||||||
|
// range [-307,324]. One does not need to store a cached power for each
|
||||||
|
// k in this range. For each such k it suffices to find a cached power
|
||||||
|
// such that the exponent of the product lies in [alpha,gamma].
|
||||||
|
// This implies that the difference of the decimal exponents of adjacent
|
||||||
|
// table entries must be less than or equal to
|
||||||
|
//
|
||||||
|
// floor( (gamma - alpha) * log_10(2) ) = 8.
|
||||||
|
//
|
||||||
|
// (A smaller distance gamma-alpha would require a larger table.)
|
||||||
|
|
||||||
|
// NB:
|
||||||
|
// Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
|
||||||
|
|
||||||
|
constexpr int kCachedPowersMinDecExp = -300;
|
||||||
|
constexpr int kCachedPowersDecStep = 8;
|
||||||
|
|
||||||
|
static constexpr std::array<cached_power, 79> kCachedPowers = {{
|
||||||
|
{0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292},
|
||||||
|
{0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276},
|
||||||
|
{0xD3515C2831559A83, -954, -268}, {0x9D71AC8FADA6C9B5, -927, -260},
|
||||||
|
{0xEA9C227723EE8BCB, -901, -252}, {0xAECC49914078536D, -874, -244},
|
||||||
|
{0x823C12795DB6CE57, -847, -236}, {0xC21094364DFB5637, -821, -228},
|
||||||
|
{0x9096EA6F3848984F, -794, -220}, {0xD77485CB25823AC7, -768, -212},
|
||||||
|
{0xA086CFCD97BF97F4, -741, -204}, {0xEF340A98172AACE5, -715, -196},
|
||||||
|
{0xB23867FB2A35B28E, -688, -188}, {0x84C8D4DFD2C63F3B, -661, -180},
|
||||||
|
{0xC5DD44271AD3CDBA, -635, -172}, {0x936B9FCEBB25C996, -608, -164},
|
||||||
|
{0xDBAC6C247D62A584, -582, -156}, {0xA3AB66580D5FDAF6, -555, -148},
|
||||||
|
{0xF3E2F893DEC3F126, -529, -140}, {0xB5B5ADA8AAFF80B8, -502, -132},
|
||||||
|
{0x87625F056C7C4A8B, -475, -124}, {0xC9BCFF6034C13053, -449, -116},
|
||||||
|
{0x964E858C91BA2655, -422, -108}, {0xDFF9772470297EBD, -396, -100},
|
||||||
|
{0xA6DFBD9FB8E5B88F, -369, -92}, {0xF8A95FCF88747D94, -343, -84},
|
||||||
|
{0xB94470938FA89BCF, -316, -76}, {0x8A08F0F8BF0F156B, -289, -68},
|
||||||
|
{0xCDB02555653131B6, -263, -60}, {0x993FE2C6D07B7FAC, -236, -52},
|
||||||
|
{0xE45C10C42A2B3B06, -210, -44}, {0xAA242499697392D3, -183, -36},
|
||||||
|
{0xFD87B5F28300CA0E, -157, -28}, {0xBCE5086492111AEB, -130, -20},
|
||||||
|
{0x8CBCCC096F5088CC, -103, -12}, {0xD1B71758E219652C, -77, -4},
|
||||||
|
{0x9C40000000000000, -50, 4}, {0xE8D4A51000000000, -24, 12},
|
||||||
|
{0xAD78EBC5AC620000, 3, 20}, {0x813F3978F8940984, 30, 28},
|
||||||
|
{0xC097CE7BC90715B3, 56, 36}, {0x8F7E32CE7BEA5C70, 83, 44},
|
||||||
|
{0xD5D238A4ABE98068, 109, 52}, {0x9F4F2726179A2245, 136, 60},
|
||||||
|
{0xED63A231D4C4FB27, 162, 68}, {0xB0DE65388CC8ADA8, 189, 76},
|
||||||
|
{0x83C7088E1AAB65DB, 216, 84}, {0xC45D1DF942711D9A, 242, 92},
|
||||||
|
{0x924D692CA61BE758, 269, 100}, {0xDA01EE641A708DEA, 295, 108},
|
||||||
|
{0xA26DA3999AEF774A, 322, 116}, {0xF209787BB47D6B85, 348, 124},
|
||||||
|
{0xB454E4A179DD1877, 375, 132}, {0x865B86925B9BC5C2, 402, 140},
|
||||||
|
{0xC83553C5C8965D3D, 428, 148}, {0x952AB45CFA97A0B3, 455, 156},
|
||||||
|
{0xDE469FBD99A05FE3, 481, 164}, {0xA59BC234DB398C25, 508, 172},
|
||||||
|
{0xF6C69A72A3989F5C, 534, 180}, {0xB7DCBF5354E9BECE, 561, 188},
|
||||||
|
{0x88FCF317F22241E2, 588, 196}, {0xCC20CE9BD35C78A5, 614, 204},
|
||||||
|
{0x98165AF37B2153DF, 641, 212}, {0xE2A0B5DC971F303A, 667, 220},
|
||||||
|
{0xA8D9D1535CE3B396, 694, 228}, {0xFB9B7CD9A4A7443C, 720, 236},
|
||||||
|
{0xBB764C4CA7A44410, 747, 244}, {0x8BAB8EEFB6409C1A, 774, 252},
|
||||||
|
{0xD01FEF10A657842C, 800, 260}, {0x9B10A4E5E9913129, 827, 268},
|
||||||
|
{0xE7109BFBA19C0C9D, 853, 276}, {0xAC2820D9623BF429, 880, 284},
|
||||||
|
{0x80444B5E7AA7CF85, 907, 292}, {0xBF21E44003ACDD2D, 933, 300},
|
||||||
|
{0x8E679C2F5E44FF8F, 960, 308}, {0xD433179D9C8CB841, 986, 316},
|
||||||
|
{0x9E19DB92B4E31BA9, 1013, 324},
|
||||||
|
}};
|
||||||
|
|
||||||
|
// This computation gives exactly the same results for k as
|
||||||
|
// k = ceil((kAlpha - e - 1) * 0.30102999566398114)
|
||||||
|
// for |e| <= 1500, but doesn't require floating-point operations.
|
||||||
|
// NB: log_10(2) ~= 78913 / 2^18
|
||||||
|
const int f = kAlpha - e - 1;
|
||||||
|
const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
|
||||||
|
|
||||||
|
const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) /
|
||||||
|
kCachedPowersDecStep;
|
||||||
|
|
||||||
|
const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
|
||||||
|
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
|
||||||
|
For n == 0, returns 1 and sets pow10 := 1.
|
||||||
|
*/
|
||||||
|
inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) {
|
||||||
|
// LCOV_EXCL_START
|
||||||
|
if (n >= 1000000000) {
|
||||||
|
pow10 = 1000000000;
|
||||||
|
return 10;
|
||||||
|
}
|
||||||
|
// LCOV_EXCL_STOP
|
||||||
|
else if (n >= 100000000) {
|
||||||
|
pow10 = 100000000;
|
||||||
|
return 9;
|
||||||
|
} else if (n >= 10000000) {
|
||||||
|
pow10 = 10000000;
|
||||||
|
return 8;
|
||||||
|
} else if (n >= 1000000) {
|
||||||
|
pow10 = 1000000;
|
||||||
|
return 7;
|
||||||
|
} else if (n >= 100000) {
|
||||||
|
pow10 = 100000;
|
||||||
|
return 6;
|
||||||
|
} else if (n >= 10000) {
|
||||||
|
pow10 = 10000;
|
||||||
|
return 5;
|
||||||
|
} else if (n >= 1000) {
|
||||||
|
pow10 = 1000;
|
||||||
|
return 4;
|
||||||
|
} else if (n >= 100) {
|
||||||
|
pow10 = 100;
|
||||||
|
return 3;
|
||||||
|
} else if (n >= 10) {
|
||||||
|
pow10 = 10;
|
||||||
|
return 2;
|
||||||
|
} else {
|
||||||
|
pow10 = 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void grisu2_round(char *buf, int len, std::uint64_t dist,
|
||||||
|
std::uint64_t delta, std::uint64_t rest,
|
||||||
|
std::uint64_t ten_k) {
|
||||||
|
|
||||||
|
// <--------------------------- delta ---->
|
||||||
|
// <---- dist --------->
|
||||||
|
// --------------[------------------+-------------------]--------------
|
||||||
|
// M- w M+
|
||||||
|
//
|
||||||
|
// ten_k
|
||||||
|
// <------>
|
||||||
|
// <---- rest ---->
|
||||||
|
// --------------[------------------+----+--------------]--------------
|
||||||
|
// w V
|
||||||
|
// = buf * 10^k
|
||||||
|
//
|
||||||
|
// ten_k represents a unit-in-the-last-place in the decimal representation
|
||||||
|
// stored in buf.
|
||||||
|
// Decrement buf by ten_k while this takes buf closer to w.
|
||||||
|
|
||||||
|
// The tests are written in this order to avoid overflow in unsigned
|
||||||
|
// integer arithmetic.
|
||||||
|
|
||||||
|
while (rest < dist && delta - rest >= ten_k &&
|
||||||
|
(rest + ten_k < dist || dist - rest > rest + ten_k - dist)) {
|
||||||
|
buf[len - 1]--;
|
||||||
|
rest += ten_k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
|
||||||
|
M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
|
||||||
|
*/
|
||||||
|
inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent,
|
||||||
|
diyfp M_minus, diyfp w, diyfp M_plus) {
|
||||||
|
static_assert(kAlpha >= -60, "internal error");
|
||||||
|
static_assert(kGamma <= -32, "internal error");
|
||||||
|
|
||||||
|
// Generates the digits (and the exponent) of a decimal floating-point
|
||||||
|
// number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
|
||||||
|
// w, M- and M+ share the same exponent e, which satisfies alpha <= e <=
|
||||||
|
// gamma.
|
||||||
|
//
|
||||||
|
// <--------------------------- delta ---->
|
||||||
|
// <---- dist --------->
|
||||||
|
// --------------[------------------+-------------------]--------------
|
||||||
|
// M- w M+
|
||||||
|
//
|
||||||
|
// Grisu2 generates the digits of M+ from left to right and stops as soon as
|
||||||
|
// V is in [M-,M+].
|
||||||
|
|
||||||
|
std::uint64_t delta =
|
||||||
|
diyfp::sub(M_plus, M_minus)
|
||||||
|
.f; // (significand of (M+ - M-), implicit exponent is e)
|
||||||
|
std::uint64_t dist =
|
||||||
|
diyfp::sub(M_plus, w)
|
||||||
|
.f; // (significand of (M+ - w ), implicit exponent is e)
|
||||||
|
|
||||||
|
// Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
|
||||||
|
//
|
||||||
|
// M+ = f * 2^e
|
||||||
|
// = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
|
||||||
|
// = ((p1 ) * 2^-e + (p2 )) * 2^e
|
||||||
|
// = p1 + p2 * 2^e
|
||||||
|
|
||||||
|
const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
|
||||||
|
|
||||||
|
auto p1 = static_cast<std::uint32_t>(
|
||||||
|
M_plus.f >>
|
||||||
|
-one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
|
||||||
|
std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e
|
||||||
|
|
||||||
|
// 1)
|
||||||
|
//
|
||||||
|
// Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
|
||||||
|
|
||||||
|
std::uint32_t pow10;
|
||||||
|
const int k = find_largest_pow10(p1, pow10);
|
||||||
|
|
||||||
|
// 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
|
||||||
|
//
|
||||||
|
// p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
|
||||||
|
// = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1))
|
||||||
|
//
|
||||||
|
// M+ = p1 + p2 * 2^e
|
||||||
|
// = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e
|
||||||
|
// = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
|
||||||
|
// = d[k-1] * 10^(k-1) + ( rest) * 2^e
|
||||||
|
//
|
||||||
|
// Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
|
||||||
|
//
|
||||||
|
// p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
|
||||||
|
//
|
||||||
|
// but stop as soon as
|
||||||
|
//
|
||||||
|
// rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
|
||||||
|
|
||||||
|
int n = k;
|
||||||
|
while (n > 0) {
|
||||||
|
// Invariants:
|
||||||
|
// M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k)
|
||||||
|
// pow10 = 10^(n-1) <= p1 < 10^n
|
||||||
|
//
|
||||||
|
const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1)
|
||||||
|
const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1)
|
||||||
|
//
|
||||||
|
// M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
|
||||||
|
// = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
|
||||||
|
//
|
||||||
|
buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
|
||||||
|
//
|
||||||
|
// M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
|
||||||
|
//
|
||||||
|
p1 = r;
|
||||||
|
n--;
|
||||||
|
//
|
||||||
|
// M+ = buffer * 10^n + (p1 + p2 * 2^e)
|
||||||
|
// pow10 = 10^n
|
||||||
|
//
|
||||||
|
|
||||||
|
// Now check if enough digits have been generated.
|
||||||
|
// Compute
|
||||||
|
//
|
||||||
|
// p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
|
||||||
|
//
|
||||||
|
// Note:
|
||||||
|
// Since rest and delta share the same exponent e, it suffices to
|
||||||
|
// compare the significands.
|
||||||
|
const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
|
||||||
|
if (rest <= delta) {
|
||||||
|
// V = buffer * 10^n, with M- <= V <= M+.
|
||||||
|
|
||||||
|
decimal_exponent += n;
|
||||||
|
|
||||||
|
// We may now just stop. But instead look if the buffer could be
|
||||||
|
// decremented to bring V closer to w.
|
||||||
|
//
|
||||||
|
// pow10 = 10^n is now 1 ulp in the decimal representation V.
|
||||||
|
// The rounding procedure works with diyfp's with an implicit
|
||||||
|
// exponent of e.
|
||||||
|
//
|
||||||
|
// 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
|
||||||
|
//
|
||||||
|
const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
|
||||||
|
grisu2_round(buffer, length, dist, delta, rest, ten_n);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pow10 /= 10;
|
||||||
|
//
|
||||||
|
// pow10 = 10^(n-1) <= p1 < 10^n
|
||||||
|
// Invariants restored.
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2)
|
||||||
|
//
|
||||||
|
// The digits of the integral part have been generated:
|
||||||
|
//
|
||||||
|
// M+ = d[k-1]...d[1]d[0] + p2 * 2^e
|
||||||
|
// = buffer + p2 * 2^e
|
||||||
|
//
|
||||||
|
// Now generate the digits of the fractional part p2 * 2^e.
|
||||||
|
//
|
||||||
|
// Note:
|
||||||
|
// No decimal point is generated: the exponent is adjusted instead.
|
||||||
|
//
|
||||||
|
// p2 actually represents the fraction
|
||||||
|
//
|
||||||
|
// p2 * 2^e
|
||||||
|
// = p2 / 2^-e
|
||||||
|
// = d[-1] / 10^1 + d[-2] / 10^2 + ...
|
||||||
|
//
|
||||||
|
// Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
|
||||||
|
//
|
||||||
|
// p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
|
||||||
|
// + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
|
||||||
|
//
|
||||||
|
// using
|
||||||
|
//
|
||||||
|
// 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
|
||||||
|
// = ( d) * 2^-e + ( r)
|
||||||
|
//
|
||||||
|
// or
|
||||||
|
// 10^m * p2 * 2^e = d + r * 2^e
|
||||||
|
//
|
||||||
|
// i.e.
|
||||||
|
//
|
||||||
|
// M+ = buffer + p2 * 2^e
|
||||||
|
// = buffer + 10^-m * (d + r * 2^e)
|
||||||
|
// = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
|
||||||
|
//
|
||||||
|
// and stop as soon as 10^-m * r * 2^e <= delta * 2^e
|
||||||
|
|
||||||
|
int m = 0;
|
||||||
|
for (;;) {
|
||||||
|
// Invariant:
|
||||||
|
// M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...)
|
||||||
|
// * 2^e
|
||||||
|
// = buffer * 10^-m + 10^-m * (p2 )
|
||||||
|
// * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e =
|
||||||
|
// buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e +
|
||||||
|
// (10*p2 mod 2^-e)) * 2^e
|
||||||
|
//
|
||||||
|
p2 *= 10;
|
||||||
|
const std::uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e
|
||||||
|
const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
|
||||||
|
//
|
||||||
|
// M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
|
||||||
|
// = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
|
||||||
|
// = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
|
||||||
|
//
|
||||||
|
buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
|
||||||
|
//
|
||||||
|
// M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
|
||||||
|
//
|
||||||
|
p2 = r;
|
||||||
|
m++;
|
||||||
|
//
|
||||||
|
// M+ = buffer * 10^-m + 10^-m * p2 * 2^e
|
||||||
|
// Invariant restored.
|
||||||
|
|
||||||
|
// Check if enough digits have been generated.
|
||||||
|
//
|
||||||
|
// 10^-m * p2 * 2^e <= delta * 2^e
|
||||||
|
// p2 * 2^e <= 10^m * delta * 2^e
|
||||||
|
// p2 <= 10^m * delta
|
||||||
|
delta *= 10;
|
||||||
|
dist *= 10;
|
||||||
|
if (p2 <= delta) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// V = buffer * 10^-m, with M- <= V <= M+.
|
||||||
|
|
||||||
|
decimal_exponent -= m;
|
||||||
|
|
||||||
|
// 1 ulp in the decimal representation is now 10^-m.
|
||||||
|
// Since delta and dist are now scaled by 10^m, we need to do the
|
||||||
|
// same with ulp in order to keep the units in sync.
|
||||||
|
//
|
||||||
|
// 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
|
||||||
|
//
|
||||||
|
const std::uint64_t ten_m = one.f;
|
||||||
|
grisu2_round(buffer, length, dist, delta, p2, ten_m);
|
||||||
|
|
||||||
|
// By construction this algorithm generates the shortest possible decimal
|
||||||
|
// number (Loitsch, Theorem 6.2) which rounds back to w.
|
||||||
|
// For an input number of precision p, at least
|
||||||
|
//
|
||||||
|
// N = 1 + ceil(p * log_10(2))
|
||||||
|
//
|
||||||
|
// decimal digits are sufficient to identify all binary floating-point
|
||||||
|
// numbers (Matula, "In-and-Out conversions").
|
||||||
|
// This implies that the algorithm does not produce more than N decimal
|
||||||
|
// digits.
|
||||||
|
//
|
||||||
|
// N = 17 for p = 53 (IEEE double precision)
|
||||||
|
// N = 9 for p = 24 (IEEE single precision)
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
v = buf * 10^decimal_exponent
|
||||||
|
len is the length of the buffer (number of decimal digits)
|
||||||
|
The buffer must be large enough, i.e. >= max_digits10.
|
||||||
|
*/
|
||||||
|
inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus,
|
||||||
|
diyfp v, diyfp m_plus) {
|
||||||
|
|
||||||
|
// --------(-----------------------+-----------------------)-------- (A)
|
||||||
|
// m- v m+
|
||||||
|
//
|
||||||
|
// --------------------(-----------+-----------------------)-------- (B)
|
||||||
|
// m- v m+
|
||||||
|
//
|
||||||
|
// First scale v (and m- and m+) such that the exponent is in the range
|
||||||
|
// [alpha, gamma].
|
||||||
|
|
||||||
|
const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
|
||||||
|
|
||||||
|
const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
|
||||||
|
|
||||||
|
// The exponent of the products is = v.e + c_minus_k.e + q and is in the range
|
||||||
|
// [alpha,gamma]
|
||||||
|
const diyfp w = diyfp::mul(v, c_minus_k);
|
||||||
|
const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
|
||||||
|
const diyfp w_plus = diyfp::mul(m_plus, c_minus_k);
|
||||||
|
|
||||||
|
// ----(---+---)---------------(---+---)---------------(---+---)----
|
||||||
|
// w- w w+
|
||||||
|
// = c*m- = c*v = c*m+
|
||||||
|
//
|
||||||
|
// diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
|
||||||
|
// w+ are now off by a small amount.
|
||||||
|
// In fact:
|
||||||
|
//
|
||||||
|
// w - v * 10^k < 1 ulp
|
||||||
|
//
|
||||||
|
// To account for this inaccuracy, add resp. subtract 1 ulp.
|
||||||
|
//
|
||||||
|
// --------+---[---------------(---+---)---------------]---+--------
|
||||||
|
// w- M- w M+ w+
|
||||||
|
//
|
||||||
|
// Now any number in [M-, M+] (bounds included) will round to w when input,
|
||||||
|
// regardless of how the input rounding algorithm breaks ties.
|
||||||
|
//
|
||||||
|
// And digit_gen generates the shortest possible such number in [M-, M+].
|
||||||
|
// Note that this does not mean that Grisu2 always generates the shortest
|
||||||
|
// possible number in the interval (m-, m+).
|
||||||
|
const diyfp M_minus(w_minus.f + 1, w_minus.e);
|
||||||
|
const diyfp M_plus(w_plus.f - 1, w_plus.e);
|
||||||
|
|
||||||
|
decimal_exponent = -cached.k; // = -(-k) = k
|
||||||
|
|
||||||
|
grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
v = buf * 10^decimal_exponent
|
||||||
|
len is the length of the buffer (number of decimal digits)
|
||||||
|
The buffer must be large enough, i.e. >= max_digits10.
|
||||||
|
*/
|
||||||
|
template <typename FloatType>
|
||||||
|
void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) {
|
||||||
|
static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
|
||||||
|
"internal error: not enough precision");
|
||||||
|
|
||||||
|
// If the neighbors (and boundaries) of 'value' are always computed for
|
||||||
|
// double-precision numbers, all float's can be recovered using strtod (and
|
||||||
|
// strtof). However, the resulting decimal representations are not exactly
|
||||||
|
// "short".
|
||||||
|
//
|
||||||
|
// The documentation for 'std::to_chars'
|
||||||
|
// (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is
|
||||||
|
// converted to a string as if by std::sprintf in the default ("C") locale"
|
||||||
|
// and since sprintf promotes float's to double's, I think this is exactly
|
||||||
|
// what 'std::to_chars' does. On the other hand, the documentation for
|
||||||
|
// 'std::to_chars' requires that "parsing the representation using the
|
||||||
|
// corresponding std::from_chars function recovers value exactly". That
|
||||||
|
// indicates that single precision floating-point numbers should be recovered
|
||||||
|
// using 'std::strtof'.
|
||||||
|
//
|
||||||
|
// NB: If the neighbors are computed for single-precision numbers, there is a
|
||||||
|
// single float
|
||||||
|
// (7.0385307e-26f) which can't be recovered using strtod. The resulting
|
||||||
|
// double precision value is off by 1 ulp.
|
||||||
|
#if 0
|
||||||
|
const boundaries w = compute_boundaries(static_cast<double>(value));
|
||||||
|
#else
|
||||||
|
const boundaries w = compute_boundaries(value);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@brief appends a decimal representation of e to buf
|
||||||
|
@return a pointer to the element following the exponent.
|
||||||
|
@pre -1000 < e < 1000
|
||||||
|
*/
|
||||||
|
inline char *append_exponent(char *buf, int e) {
|
||||||
|
|
||||||
|
if (e < 0) {
|
||||||
|
e = -e;
|
||||||
|
*buf++ = '-';
|
||||||
|
} else {
|
||||||
|
*buf++ = '+';
|
||||||
|
}
|
||||||
|
|
||||||
|
auto k = static_cast<std::uint32_t>(e);
|
||||||
|
if (k < 10) {
|
||||||
|
// Always print at least two digits in the exponent.
|
||||||
|
// This is for compatibility with printf("%g").
|
||||||
|
*buf++ = '0';
|
||||||
|
*buf++ = static_cast<char>('0' + k);
|
||||||
|
} else if (k < 100) {
|
||||||
|
*buf++ = static_cast<char>('0' + k / 10);
|
||||||
|
k %= 10;
|
||||||
|
*buf++ = static_cast<char>('0' + k);
|
||||||
|
} else {
|
||||||
|
*buf++ = static_cast<char>('0' + k / 100);
|
||||||
|
k %= 100;
|
||||||
|
*buf++ = static_cast<char>('0' + k / 10);
|
||||||
|
k %= 10;
|
||||||
|
*buf++ = static_cast<char>('0' + k);
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@brief prettify v = buf * 10^decimal_exponent
|
||||||
|
If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
|
||||||
|
notation. Otherwise it will be printed in exponential notation.
|
||||||
|
@pre min_exp < 0
|
||||||
|
@pre max_exp > 0
|
||||||
|
*/
|
||||||
|
inline char *format_buffer(char *buf, int len, int decimal_exponent,
|
||||||
|
int min_exp, int max_exp) {
|
||||||
|
|
||||||
|
const int k = len;
|
||||||
|
const int n = len + decimal_exponent;
|
||||||
|
|
||||||
|
// v = buf * 10^(n-k)
|
||||||
|
// k is the length of the buffer (number of decimal digits)
|
||||||
|
// n is the position of the decimal point relative to the start of the buffer.
|
||||||
|
|
||||||
|
if (k <= n && n <= max_exp) {
|
||||||
|
// digits[000]
|
||||||
|
// len <= max_exp + 2
|
||||||
|
|
||||||
|
std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
|
||||||
|
// Make it look like a floating-point number (#362, #378)
|
||||||
|
buf[n + 0] = '.';
|
||||||
|
buf[n + 1] = '0';
|
||||||
|
return buf + (static_cast<size_t>(n) + 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 < n && n <= max_exp) {
|
||||||
|
// dig.its
|
||||||
|
// len <= max_digits10 + 1
|
||||||
|
std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n,
|
||||||
|
static_cast<size_t>(k) - static_cast<size_t>(n));
|
||||||
|
buf[n] = '.';
|
||||||
|
return buf + (static_cast<size_t>(k) + 1U);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (min_exp < n && n <= 0) {
|
||||||
|
// 0.[000]digits
|
||||||
|
// len <= 2 + (-min_exp - 1) + max_digits10
|
||||||
|
|
||||||
|
std::memmove(buf + (2 + static_cast<size_t>(-n)), buf,
|
||||||
|
static_cast<size_t>(k));
|
||||||
|
buf[0] = '0';
|
||||||
|
buf[1] = '.';
|
||||||
|
std::memset(buf + 2, '0', static_cast<size_t>(-n));
|
||||||
|
return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (k == 1) {
|
||||||
|
// dE+123
|
||||||
|
// len <= 1 + 5
|
||||||
|
|
||||||
|
buf += 1;
|
||||||
|
} else {
|
||||||
|
// d.igitsE+123
|
||||||
|
// len <= max_digits10 + 1 + 5
|
||||||
|
|
||||||
|
std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
|
||||||
|
buf[1] = '.';
|
||||||
|
buf += 1 + static_cast<size_t>(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
*buf++ = 'e';
|
||||||
|
return append_exponent(buf, n - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace dtoa_impl
|
||||||
|
|
||||||
|
/*!
|
||||||
|
The format of the resulting decimal representation is similar to printf's %g
|
||||||
|
format. Returns an iterator pointing past-the-end of the decimal representation.
|
||||||
|
@note The input number must be finite, i.e. NaN's and Inf's are not supported.
|
||||||
|
@note The buffer must be large enough.
|
||||||
|
@note The result is NOT null-terminated.
|
||||||
|
*/
|
||||||
|
char *to_chars(char *first, const char *last, double value) {
|
||||||
|
static_cast<void>(last); // maybe unused - fix warning
|
||||||
|
// Use signbit(value) instead of (value < 0) since signbit works for -0.
|
||||||
|
if (std::signbit(value)) {
|
||||||
|
value = -value;
|
||||||
|
*first++ = '-';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value == 0) // +-0
|
||||||
|
{
|
||||||
|
*first++ = '0';
|
||||||
|
// Make it look like a floating-point number (#362, #378)
|
||||||
|
*first++ = '.';
|
||||||
|
*first++ = '0';
|
||||||
|
return first;
|
||||||
|
}
|
||||||
|
// Compute v = buffer * 10^decimal_exponent.
|
||||||
|
// The decimal digits are stored in the buffer, which needs to be interpreted
|
||||||
|
// as an unsigned decimal integer.
|
||||||
|
// len is the length of the buffer, i.e. the number of decimal digits.
|
||||||
|
int len = 0;
|
||||||
|
int decimal_exponent = 0;
|
||||||
|
dtoa_impl::grisu2(first, len, decimal_exponent, value);
|
||||||
|
// Format the buffer like printf("%.*g", prec, value)
|
||||||
|
constexpr int kMinExp = -4;
|
||||||
|
constexpr int kMaxExp = std::numeric_limits<double>::digits10;
|
||||||
|
|
||||||
|
return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp,
|
||||||
|
kMaxExp);
|
||||||
|
}
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace simdjson
|
|
@ -52,6 +52,7 @@ target_compile_definitions(stringparsingcheck PRIVATE NOMINMAX)
|
||||||
# All remaining tests link with simdjson proper
|
# All remaining tests link with simdjson proper
|
||||||
link_libraries(simdjson)
|
link_libraries(simdjson)
|
||||||
add_cpp_test(basictests LABELS acceptance per_implementation)
|
add_cpp_test(basictests LABELS acceptance per_implementation)
|
||||||
|
add_cpp_test(minify_tests LABELS acceptance per_implementation)
|
||||||
add_cpp_test(document_stream_tests LABELS acceptance per_implementation)
|
add_cpp_test(document_stream_tests LABELS acceptance per_implementation)
|
||||||
add_cpp_test(document_tests LABELS acceptance per_implementation)
|
add_cpp_test(document_tests LABELS acceptance per_implementation)
|
||||||
add_cpp_test(errortests LABELS acceptance per_implementation)
|
add_cpp_test(errortests LABELS acceptance per_implementation)
|
||||||
|
|
|
@ -1365,8 +1365,8 @@ namespace minify_tests {
|
||||||
|
|
||||||
bool test_minify() {
|
bool test_minify() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })";
|
const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })";
|
||||||
const std::string minified(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
|
const std::string minified(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
|
||||||
return check_minification(test.c_str(), test.size(), minified.c_str(), minified.size());
|
return check_minification(test.c_str(), test.size(), minified.c_str(), minified.size());
|
||||||
}
|
}
|
||||||
bool test_minify_array() {
|
bool test_minify_array() {
|
||||||
|
@ -1394,8 +1394,8 @@ namespace format_tests {
|
||||||
using namespace simdjson;
|
using namespace simdjson;
|
||||||
using namespace simdjson::dom;
|
using namespace simdjson::dom;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })"_padded;
|
const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })"_padded;
|
||||||
const string MINIFIED(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
|
const string MINIFIED(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
|
||||||
bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
|
bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
|
||||||
if (actual.str() != expected) {
|
if (actual.str() != expected) {
|
||||||
cerr << "Failed to correctly minify " << DOCUMENT << endl;
|
cerr << "Failed to correctly minify " << DOCUMENT << endl;
|
||||||
|
@ -1451,7 +1451,7 @@ namespace format_tests {
|
||||||
ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
|
ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << array;
|
s << array;
|
||||||
return assert_minified(s, "[1,2,3]");
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
}
|
}
|
||||||
bool print_minify_array() {
|
bool print_minify_array() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
@ -1460,7 +1460,7 @@ namespace format_tests {
|
||||||
ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
|
ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << minify(array);
|
s << minify(array);
|
||||||
return assert_minified(s, "[1,2,3]");
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool print_object() {
|
bool print_object() {
|
||||||
|
@ -1470,7 +1470,7 @@ namespace format_tests {
|
||||||
ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
|
ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << object;
|
s << object;
|
||||||
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
}
|
}
|
||||||
bool print_minify_object() {
|
bool print_minify_object() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
@ -1479,7 +1479,7 @@ namespace format_tests {
|
||||||
ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
|
ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << minify(object);
|
s << minify(object);
|
||||||
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
@ -1536,14 +1536,14 @@ namespace format_tests {
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << parser.parse(DOCUMENT)["bar"].get<dom::array>();
|
s << parser.parse(DOCUMENT)["bar"].get<dom::array>();
|
||||||
return assert_minified(s, "[1,2,3]");
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
}
|
}
|
||||||
bool print_minify_array_result_exception() {
|
bool print_minify_array_result_exception() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << minify(parser.parse(DOCUMENT)["bar"].get<dom::array>());
|
s << minify(parser.parse(DOCUMENT)["bar"].get<dom::array>());
|
||||||
return assert_minified(s, "[1,2,3]");
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool print_object_result_exception() {
|
bool print_object_result_exception() {
|
||||||
|
@ -1551,14 +1551,14 @@ namespace format_tests {
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << parser.parse(DOCUMENT)["baz"].get<dom::object>();
|
s << parser.parse(DOCUMENT)["baz"].get<dom::object>();
|
||||||
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
}
|
}
|
||||||
bool print_minify_object_result_exception() {
|
bool print_minify_object_result_exception() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
dom::parser parser;
|
dom::parser parser;
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << minify(parser.parse(DOCUMENT)["baz"].get<dom::object>());
|
s << minify(parser.parse(DOCUMENT)["baz"].get<dom::object>());
|
||||||
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool print_array_exception() {
|
bool print_array_exception() {
|
||||||
|
@ -1567,7 +1567,7 @@ namespace format_tests {
|
||||||
dom::array array = parser.parse(DOCUMENT)["bar"];
|
dom::array array = parser.parse(DOCUMENT)["bar"];
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << array;
|
s << array;
|
||||||
return assert_minified(s, "[1,2,3]");
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
}
|
}
|
||||||
bool print_minify_array_exception() {
|
bool print_minify_array_exception() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
@ -1575,7 +1575,7 @@ namespace format_tests {
|
||||||
dom::array array = parser.parse(DOCUMENT)["bar"];
|
dom::array array = parser.parse(DOCUMENT)["bar"];
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << minify(array);
|
s << minify(array);
|
||||||
return assert_minified(s, "[1,2,3]");
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool print_object_exception() {
|
bool print_object_exception() {
|
||||||
|
@ -1584,7 +1584,7 @@ namespace format_tests {
|
||||||
dom::object object = parser.parse(DOCUMENT)["baz"];
|
dom::object object = parser.parse(DOCUMENT)["baz"];
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << object;
|
s << object;
|
||||||
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
}
|
}
|
||||||
bool print_minify_object_exception() {
|
bool print_minify_object_exception() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
@ -1592,7 +1592,7 @@ namespace format_tests {
|
||||||
dom::object object = parser.parse(DOCUMENT)["baz"];
|
dom::object object = parser.parse(DOCUMENT)["baz"];
|
||||||
ostringstream s;
|
ostringstream s;
|
||||||
s << minify(object);
|
s << minify(object);
|
||||||
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
}
|
}
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
#endif // SIMDJSON_EXCEPTIONS
|
||||||
|
|
||||||
|
@ -1615,6 +1615,149 @@ namespace format_tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace to_string_tests {
|
||||||
|
using namespace simdjson;
|
||||||
|
using namespace simdjson::dom;
|
||||||
|
using namespace std;
|
||||||
|
const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })"_padded;
|
||||||
|
const string MINIFIED(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
|
||||||
|
bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
|
||||||
|
if (actual.str() != expected) {
|
||||||
|
cerr << "Failed to correctly to_string " << DOCUMENT << endl;
|
||||||
|
cerr << "Expected: " << expected << endl;
|
||||||
|
cerr << "Actual: " << actual.str() << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool print_to_string_parser_parse() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
dom::element doc;
|
||||||
|
ASSERT_SUCCESS( parser.parse(DOCUMENT).get(doc) );
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(doc);
|
||||||
|
return assert_minified(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool print_to_string_element() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
dom::element value;
|
||||||
|
ASSERT_SUCCESS( parser.parse(DOCUMENT)["foo"].get(value) );
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(value);
|
||||||
|
return assert_minified(s, "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool print_to_string_array() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
dom::array array;
|
||||||
|
ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(array);
|
||||||
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool print_to_string_object() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
dom::object object;
|
||||||
|
ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(object);
|
||||||
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
|
}
|
||||||
|
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
|
||||||
|
bool print_to_string_parser_parse_exception() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(parser.parse(DOCUMENT));
|
||||||
|
return assert_minified(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool print_to_string_element_result_exception() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(parser.parse(DOCUMENT)["foo"]);
|
||||||
|
return assert_minified(s, "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool print_to_string_element_exception() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
element value = parser.parse(DOCUMENT)["foo"];
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(value);
|
||||||
|
return assert_minified(s, "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool print_to_string_array_result_exception() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(parser.parse(DOCUMENT)["bar"].get<dom::array>());
|
||||||
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool print_to_string_object_result_exception() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(parser.parse(DOCUMENT)["baz"].get<dom::object>());
|
||||||
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool print_to_string_array_exception() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
dom::array array = parser.parse(DOCUMENT)["bar"];
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(array);
|
||||||
|
return assert_minified(s, "[1,2,0.11111111111111113]");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool print_to_string_object_exception() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
dom::object object = parser.parse(DOCUMENT)["baz"];
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(object);
|
||||||
|
return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
|
||||||
|
}
|
||||||
|
#endif // SIMDJSON_EXCEPTIONS
|
||||||
|
|
||||||
|
bool run() {
|
||||||
|
return print_to_string_parser_parse() &&
|
||||||
|
print_to_string_element() &&
|
||||||
|
print_to_string_array() &&
|
||||||
|
print_to_string_object() &&
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
print_to_string_parser_parse_exception() &&
|
||||||
|
print_to_string_element_result_exception() &&
|
||||||
|
print_to_string_array_result_exception() &&
|
||||||
|
print_to_string_object_result_exception() &&
|
||||||
|
print_to_string_element_exception() &&
|
||||||
|
print_to_string_array_exception() &&
|
||||||
|
print_to_string_object_exception() &&
|
||||||
|
#endif
|
||||||
|
true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
std::cout << std::unitbuf;
|
std::cout << std::unitbuf;
|
||||||
int c;
|
int c;
|
||||||
|
@ -1646,7 +1789,8 @@ int main(int argc, char *argv[]) {
|
||||||
std::cout << "------------------------------------------------------------" << std::endl;
|
std::cout << "------------------------------------------------------------" << std::endl;
|
||||||
|
|
||||||
std::cout << "Running basic tests." << std::endl;
|
std::cout << "Running basic tests." << std::endl;
|
||||||
if (validate_tests::run() &&
|
if (to_string_tests::run() &&
|
||||||
|
validate_tests::run() &&
|
||||||
minify_tests::run() &&
|
minify_tests::run() &&
|
||||||
parse_api_tests::run() &&
|
parse_api_tests::run() &&
|
||||||
dom_api_tests::run() &&
|
dom_api_tests::run() &&
|
||||||
|
|
|
@ -0,0 +1,80 @@
|
||||||
|
#include <cinttypes>
|
||||||
|
#include <ciso646>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "cast_tester.h"
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "test_macros.h"
|
||||||
|
|
||||||
|
const char *test_files[] = {
|
||||||
|
TWITTER_JSON, TWITTER_TIMELINE_JSON, REPEAT_JSON, CANADA_JSON,
|
||||||
|
MESH_JSON, APACHE_JSON, GSOC_JSON};
|
||||||
|
/**
|
||||||
|
* The general idea of these tests if that if you take a JSON file,
|
||||||
|
* load it, then convert it into a string, then parse that, and
|
||||||
|
* convert it again into a second string, then the two strings should
|
||||||
|
* be identifical. If not, then something was lost or added in the
|
||||||
|
* process.
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool load_to_string(const char *filename) {
|
||||||
|
std::cout << "Loading " << filename << std::endl;
|
||||||
|
simdjson::dom::parser parser;
|
||||||
|
simdjson::dom::element doc;
|
||||||
|
auto error = parser.load(filename).get(doc);
|
||||||
|
if (error) { std::cerr << error << std::endl; return false; }
|
||||||
|
auto serial1 = simdjson::to_string(doc);
|
||||||
|
error = parser.parse(serial1).get(doc);
|
||||||
|
if (error) { std::cerr << error << std::endl; return false; }
|
||||||
|
auto serial2 = simdjson::to_string(doc);
|
||||||
|
bool match = (serial1 == serial2);
|
||||||
|
if (match) {
|
||||||
|
std::cout << "Parsing to_string and calling to_string again results in the "
|
||||||
|
"same content."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool load_minify(const char *filename) {
|
||||||
|
std::cout << "Loading " << filename << std::endl;
|
||||||
|
simdjson::dom::parser parser;
|
||||||
|
simdjson::dom::element doc;
|
||||||
|
auto error = parser.load(filename).get(doc);
|
||||||
|
if (error) { std::cerr << error << std::endl; return false; }
|
||||||
|
auto serial1 = simdjson::minify(doc);
|
||||||
|
error = parser.parse(serial1).get(doc);
|
||||||
|
if (error) { std::cerr << error << std::endl; return false; }
|
||||||
|
auto serial2 = simdjson::minify(doc);
|
||||||
|
bool match = (serial1 == serial2);
|
||||||
|
if (match) {
|
||||||
|
std::cout << "Parsing minify and calling minify again results in the same "
|
||||||
|
"content."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool minify_test() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < sizeof(test_files) / sizeof(test_files[0]); i++) {
|
||||||
|
bool ok = load_to_string(test_files[i]) && load_minify(test_files[i]);
|
||||||
|
if (!ok) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() { return minify_test() ? EXIT_SUCCESS : EXIT_FAILURE; }
|
|
@ -7,6 +7,11 @@
|
||||||
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||||
const char *TWITTER_TIMELINE_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter_timeline.json";
|
const char *TWITTER_TIMELINE_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter_timeline.json";
|
||||||
const char *REPEAT_JSON = SIMDJSON_BENCHMARK_DATA_DIR "repeat.json";
|
const char *REPEAT_JSON = SIMDJSON_BENCHMARK_DATA_DIR "repeat.json";
|
||||||
|
const char *CANADA_JSON = SIMDJSON_BENCHMARK_DATA_DIR "canada.json";
|
||||||
|
const char *MESH_JSON = SIMDJSON_BENCHMARK_DATA_DIR "mesh.json";
|
||||||
|
const char *APACHE_JSON = SIMDJSON_BENCHMARK_DATA_DIR "apache_builds.json";
|
||||||
|
const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
|
||||||
|
|
||||||
const char *AMAZON_CELLPHONES_NDJSON = SIMDJSON_BENCHMARK_DATA_DIR "amazon_cellphones.ndjson";
|
const char *AMAZON_CELLPHONES_NDJSON = SIMDJSON_BENCHMARK_DATA_DIR "amazon_cellphones.ndjson";
|
||||||
|
|
||||||
#define SIMDJSON_BENCHMARK_SMALLDATA_DIR SIMDJSON_BENCHMARK_DATA_DIR "small/"
|
#define SIMDJSON_BENCHMARK_SMALLDATA_DIR SIMDJSON_BENCHMARK_DATA_DIR "small/"
|
||||||
|
|
Loading…
Reference in New Issue