Fixing "undefined behavior" issue in new fast_itoa functions (#1186)

* Fixing "undefined behavior" issue.

* Simplifying our custom atoi

* Fixing minor bug
This commit is contained in:
Daniel Lemire 2020-09-29 19:17:03 -04:00 committed by GitHub
parent 048fb6278a
commit da093c1982
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 39 deletions

View File

@ -43,29 +43,36 @@ struct escape_sequence {
*/ */
char *fast_itoa(char *output, int64_t value) noexcept { char *fast_itoa(char *output, int64_t value) noexcept {
// This is a standard implementation of itoa. // This is a standard implementation of itoa.
// We first write in reverse order and then reverse. char buffer[20];
uint64_t value_positive;
// In general, negating a signed integer is unsafe.
if(value < 0) { if(value < 0) {
*output++ = '-'; *output++ = '-';
value = -value; // Doing value_positive = -value; while avoiding
// undefined behavior warnings.
// It assumes two complement's which is universal at this
// point in time.
std::memcpy(&value_positive, &value, sizeof(value));
value_positive = (~value_positive) + 1; // this is a negation
} else {
value_positive = value;
} }
char *write_pointer = output; // We work solely with value_positive. It *might* be easier
do { // for an optimizing compiler to deal with an unsigned variable
*write_pointer++ = char('0' + (value % 10)); // as far as performance goes.
value /= 10; const char *const end_buffer = buffer + 20;
} while (value != 0); char *write_pointer = buffer + 19;
// then we reverse the result // A faster approach is possible if we expect large integers:
char *const answer = write_pointer; // unroll the loop (work in 100s, 1000s) and use some kind of
char *second_write_pointer = output; // memoization.
write_pointer -= 1; while(value_positive >= 10) {
while (second_write_pointer < write_pointer) { *write_pointer-- = char('0' + (value_positive % 10));
char c1 = *write_pointer; value_positive /= 10;
char c2 = *second_write_pointer;
*second_write_pointer = c1;
*write_pointer = c2;
write_pointer--;
second_write_pointer++;
} }
return answer; *write_pointer = char('0' + value_positive);
size_t len = end_buffer - write_pointer;
std::memcpy(output, write_pointer, len);
return output + len;
} }
/**@private /**@private
* This converts an unsigned integer into a character sequence. * This converts an unsigned integer into a character sequence.
@ -78,25 +85,20 @@ char *fast_itoa(char *output, int64_t value) noexcept {
*/ */
char *fast_itoa(char *output, uint64_t value) noexcept { char *fast_itoa(char *output, uint64_t value) noexcept {
// This is a standard implementation of itoa. // This is a standard implementation of itoa.
// We first write in reverse order and then reverse. char buffer[20];
char *write_pointer = output; const char *const end_buffer = buffer + 20;
do { char *write_pointer = buffer + 19;
*write_pointer++ = char('0' + (value % 10)); // A faster approach is possible if we expect large integers:
// unroll the loop (work in 100s, 1000s) and use some kind of
// memoization.
while(value >= 10) {
*write_pointer-- = char('0' + (value % 10));
value /= 10; value /= 10;
} while (value != 0); };
// then we reverse the result *write_pointer = char('0' + value);
char *const answer = write_pointer; size_t len = end_buffer - write_pointer;
char *second_write_pointer = output; std::memcpy(output, write_pointer, len);
write_pointer -= 1; return output + len;
while (second_write_pointer < write_pointer) {
char c1 = *write_pointer;
char c2 = *second_write_pointer;
*second_write_pointer = c1;
*write_pointer = c2;
write_pointer--;
second_write_pointer++;
}
return answer;
} }
} // anonymous namespace } // anonymous namespace
namespace internal { namespace internal {

View File

@ -1677,6 +1677,19 @@ namespace to_string_tests {
return true; return true;
} }
bool print_to_string_large_int() {
std::cout << "Running " << __func__ << std::endl;
dom::parser parser;
dom::element doc;
ASSERT_SUCCESS( parser.parse("-922337203685477580"_padded).get(doc) );
ostringstream s;
s << to_string(doc);
if(s.str() != "-922337203685477580") {
cerr << "failed to parse -922337203685477580" << endl;
return false;
}
return true;
}
bool print_to_string_parser_parse() { bool print_to_string_parser_parse() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
@ -1785,7 +1798,8 @@ namespace to_string_tests {
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
bool run() { bool run() {
return print_to_string_parser_parse() && return print_to_string_large_int() &&
print_to_string_parser_parse() &&
print_to_string_element() && print_to_string_element() &&
print_to_string_array() && print_to_string_array() &&
print_to_string_object() && print_to_string_object() &&