Fixing "undefined behavior" issue in new fast_itoa functions (#1186)
* Fixing "undefined behavior" issue. * Simplifying our custom atoi * Fixing minor bug
This commit is contained in:
parent
048fb6278a
commit
da093c1982
|
@ -43,29 +43,36 @@ struct escape_sequence {
|
||||||
*/
|
*/
|
||||||
char *fast_itoa(char *output, int64_t value) noexcept {
|
char *fast_itoa(char *output, int64_t value) noexcept {
|
||||||
// This is a standard implementation of itoa.
|
// This is a standard implementation of itoa.
|
||||||
// We first write in reverse order and then reverse.
|
char buffer[20];
|
||||||
|
uint64_t value_positive;
|
||||||
|
// In general, negating a signed integer is unsafe.
|
||||||
if(value < 0) {
|
if(value < 0) {
|
||||||
*output++ = '-';
|
*output++ = '-';
|
||||||
value = -value;
|
// Doing value_positive = -value; while avoiding
|
||||||
|
// undefined behavior warnings.
|
||||||
|
// It assumes two complement's which is universal at this
|
||||||
|
// point in time.
|
||||||
|
std::memcpy(&value_positive, &value, sizeof(value));
|
||||||
|
value_positive = (~value_positive) + 1; // this is a negation
|
||||||
|
} else {
|
||||||
|
value_positive = value;
|
||||||
}
|
}
|
||||||
char *write_pointer = output;
|
// We work solely with value_positive. It *might* be easier
|
||||||
do {
|
// for an optimizing compiler to deal with an unsigned variable
|
||||||
*write_pointer++ = char('0' + (value % 10));
|
// as far as performance goes.
|
||||||
value /= 10;
|
const char *const end_buffer = buffer + 20;
|
||||||
} while (value != 0);
|
char *write_pointer = buffer + 19;
|
||||||
// then we reverse the result
|
// A faster approach is possible if we expect large integers:
|
||||||
char *const answer = write_pointer;
|
// unroll the loop (work in 100s, 1000s) and use some kind of
|
||||||
char *second_write_pointer = output;
|
// memoization.
|
||||||
write_pointer -= 1;
|
while(value_positive >= 10) {
|
||||||
while (second_write_pointer < write_pointer) {
|
*write_pointer-- = char('0' + (value_positive % 10));
|
||||||
char c1 = *write_pointer;
|
value_positive /= 10;
|
||||||
char c2 = *second_write_pointer;
|
|
||||||
*second_write_pointer = c1;
|
|
||||||
*write_pointer = c2;
|
|
||||||
write_pointer--;
|
|
||||||
second_write_pointer++;
|
|
||||||
}
|
}
|
||||||
return answer;
|
*write_pointer = char('0' + value_positive);
|
||||||
|
size_t len = end_buffer - write_pointer;
|
||||||
|
std::memcpy(output, write_pointer, len);
|
||||||
|
return output + len;
|
||||||
}
|
}
|
||||||
/**@private
|
/**@private
|
||||||
* This converts an unsigned integer into a character sequence.
|
* This converts an unsigned integer into a character sequence.
|
||||||
|
@ -78,25 +85,20 @@ char *fast_itoa(char *output, int64_t value) noexcept {
|
||||||
*/
|
*/
|
||||||
char *fast_itoa(char *output, uint64_t value) noexcept {
|
char *fast_itoa(char *output, uint64_t value) noexcept {
|
||||||
// This is a standard implementation of itoa.
|
// This is a standard implementation of itoa.
|
||||||
// We first write in reverse order and then reverse.
|
char buffer[20];
|
||||||
char *write_pointer = output;
|
const char *const end_buffer = buffer + 20;
|
||||||
do {
|
char *write_pointer = buffer + 19;
|
||||||
*write_pointer++ = char('0' + (value % 10));
|
// A faster approach is possible if we expect large integers:
|
||||||
|
// unroll the loop (work in 100s, 1000s) and use some kind of
|
||||||
|
// memoization.
|
||||||
|
while(value >= 10) {
|
||||||
|
*write_pointer-- = char('0' + (value % 10));
|
||||||
value /= 10;
|
value /= 10;
|
||||||
} while (value != 0);
|
};
|
||||||
// then we reverse the result
|
*write_pointer = char('0' + value);
|
||||||
char *const answer = write_pointer;
|
size_t len = end_buffer - write_pointer;
|
||||||
char *second_write_pointer = output;
|
std::memcpy(output, write_pointer, len);
|
||||||
write_pointer -= 1;
|
return output + len;
|
||||||
while (second_write_pointer < write_pointer) {
|
|
||||||
char c1 = *write_pointer;
|
|
||||||
char c2 = *second_write_pointer;
|
|
||||||
*second_write_pointer = c1;
|
|
||||||
*write_pointer = c2;
|
|
||||||
write_pointer--;
|
|
||||||
second_write_pointer++;
|
|
||||||
}
|
|
||||||
return answer;
|
|
||||||
}
|
}
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
|
@ -1677,6 +1677,19 @@ namespace to_string_tests {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool print_to_string_large_int() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
dom::parser parser;
|
||||||
|
dom::element doc;
|
||||||
|
ASSERT_SUCCESS( parser.parse("-922337203685477580"_padded).get(doc) );
|
||||||
|
ostringstream s;
|
||||||
|
s << to_string(doc);
|
||||||
|
if(s.str() != "-922337203685477580") {
|
||||||
|
cerr << "failed to parse -922337203685477580" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool print_to_string_parser_parse() {
|
bool print_to_string_parser_parse() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
@ -1785,10 +1798,11 @@ namespace to_string_tests {
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
#endif // SIMDJSON_EXCEPTIONS
|
||||||
|
|
||||||
bool run() {
|
bool run() {
|
||||||
return print_to_string_parser_parse() &&
|
return print_to_string_large_int() &&
|
||||||
|
print_to_string_parser_parse() &&
|
||||||
print_to_string_element() &&
|
print_to_string_element() &&
|
||||||
print_to_string_array() &&
|
print_to_string_array() &&
|
||||||
print_to_string_object() &&
|
print_to_string_object() &&
|
||||||
#if SIMDJSON_EXCEPTIONS
|
#if SIMDJSON_EXCEPTIONS
|
||||||
print_to_string_parser_parse_exception() &&
|
print_to_string_parser_parse_exception() &&
|
||||||
print_to_string_element_result_exception() &&
|
print_to_string_element_result_exception() &&
|
||||||
|
|
Loading…
Reference in New Issue