From c11eefca32d743cdcc5f079234a77e277a697606 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 30 Nov 2018 21:31:05 -0500 Subject: [PATCH] More cleaning. --- README.md | 4 +- benchmark/minifiercompetition.cpp | 22 ++-- benchmark/parse.cpp | 6 +- benchmark/parsingcompetition.cpp | 22 ++-- include/simdjson/jsonminifier.h | 4 +- src/jsonioutil.cpp | 5 +- src/jsonparser.cpp | 4 + src/stage1_find_marks.cpp | 169 +++++++++++++++++++++++++++--- src/stage2_flatten.cpp | 2 +- tests/allparserscheckfile.cpp | 4 +- tests/jsoncheck.cpp | 9 +- tools/minify.cpp | 4 +- 12 files changed, 199 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index df91d3fc..b1e0b246 100644 --- a/README.md +++ b/README.md @@ -12,11 +12,11 @@ Goal: Speed up the parsing of JSON per se. /... const char * filename = ... // -simdjsonstring p = get_corpus(filename); +std::string_view p = get_corpus(filename); ParsedJson pj; size_t maxdepth = 1024; // support documents have nesting "depth" up to 1024 pj.allocateCapacity(p.size(), maxdepth); // allocate memory for parsing up to p.size() bytes -bool is_ok = json_parse(p.first, p.second, pj); // do the parsing, return false on error +bool is_ok = json_parse(p, pj); // do the parsing, return false on error // parsing is done! // js can be reused with other json_parse calls. ``` diff --git a/benchmark/minifiercompetition.cpp b/benchmark/minifiercompetition.cpp index 783d5ded..e53b14fd 100644 --- a/benchmark/minifiercompetition.cpp +++ b/benchmark/minifiercompetition.cpp @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { exit(1); } const char * filename = argv[optind]; - simdjsonstring p; + std::string_view p; try { p = get_corpus(filename); } catch (const std::exception& e) { // caught by reference to base @@ -79,20 +79,20 @@ int main(int argc, char *argv[]) { std::cout << std::endl; } char *buffer = allocate_aligned_buffer(p.size() + 1); - memcpy(buffer, p.c_str(), p.size()); + memcpy(buffer, p.data(), p.size()); buffer[p.size()] = '\0'; int repeat = 10; int volume = p.size(); - size_t strlength = rapidstringme((char *)p.c_str()).size(); + size_t strlength = rapidstringme((char *)p.data()).size(); if (verbose) std::cout << "input length is " << p.size() << " stringified length is " << strlength << std::endl; - BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.c_str()), , repeat, volume, true); + BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.data()), , repeat, volume, true); BEST_TIME_NOCHECK("despacing with RapidJSON Insitu", rapidstringmeInsitu((char *)buffer), - memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); - memcpy(buffer, p.c_str(), p.size()); + memcpy(buffer, p.data(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()); size_t outlength = jsonminify((const uint8_t *)buffer, p.size(), (uint8_t *)buffer); @@ -101,7 +101,7 @@ int main(int argc, char *argv[]) { uint8_t *cbuffer = (uint8_t *)buffer; BEST_TIME("jsonminify", jsonminify(cbuffer, p.size(), cbuffer), outlength, - memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, true); printf("minisize = %zu, original size = %zu (minified down to %.2f percent of original) \n", outlength, p.size(), outlength * 100.0 / p.size()); /*** @@ -109,10 +109,10 @@ int main(int argc, char *argv[]) { ***/ rapidjson::Document d; BEST_TIME("RapidJSON Insitu orig", d.ParseInsitu(buffer).HasParseError(), false, - memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, true); char *minibuffer = allocate_aligned_buffer(p.size() + 1); - size_t minisize = jsonminify((const uint8_t *)p.c_str(), p.size(), (uint8_t*) minibuffer); + size_t minisize = jsonminify((const uint8_t *)p.data(), p.size(), (uint8_t*) minibuffer); minibuffer[minisize] = '\0'; BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(), false, @@ -122,14 +122,14 @@ int main(int argc, char *argv[]) { size_t astbuffersize = p.size() * 2; size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t)); - BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true); ParsedJson pj; pj.allocateCapacity(p.size(), 1024); - BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); ParsedJson pj2; pj2.allocateCapacity(p.size(), 1024); diff --git a/benchmark/parse.cpp b/benchmark/parse.cpp index 89dcc5dd..b8b5480d 100644 --- a/benchmark/parse.cpp +++ b/benchmark/parse.cpp @@ -65,7 +65,7 @@ int main(int argc, char *argv[]) { cerr << "warning: ignoring everything after " << argv[optind + 1] << endl; } if(verbose) cout << "[verbose] loading " << filename << endl; - simdjsonstring p; + std::string_view p; try { p = get_corpus(filename); } catch (const std::exception& e) { // caught by reference to base @@ -118,7 +118,7 @@ int main(int argc, char *argv[]) { #ifndef SQUASH_COUNTERS unified.start(); #endif - isok = find_structural_bits(p.c_str(), p.size(), pj); + isok = find_structural_bits(p.data(), p.size(), pj); #ifndef SQUASH_COUNTERS unified.end(results); cy1 += results[0]; @@ -147,7 +147,7 @@ int main(int argc, char *argv[]) { unified.start(); #endif - isok = isok && unified_machine(p.c_str(), p.size(), pj); + isok = isok && unified_machine(p.data(), p.size(), pj); #ifndef SQUASH_COUNTERS unified.end(results); cy3 += results[0]; diff --git a/benchmark/parsingcompetition.cpp b/benchmark/parsingcompetition.cpp index 4a29c3e9..5083d373 100644 --- a/benchmark/parsingcompetition.cpp +++ b/benchmark/parsingcompetition.cpp @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { if(optind + 1 < argc) { cerr << "warning: ignoring everything after " << argv[optind + 1] << endl; } - simdjsonstring p; + std::string_view p; try { p = get_corpus(filename); } catch (const std::exception& e) { // caught by reference to base @@ -93,32 +93,32 @@ int main(int argc, char *argv[]) { rapidjson::Document d; char *buffer = (char *)malloc(p.size() + 1); - memcpy(buffer, p.c_str(), p.size()); + memcpy(buffer, p.data(), p.size()); buffer[p.size()] = '\0'; BEST_TIME("RapidJSON", d.Parse((const char *)buffer).HasParseError(), - false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); BEST_TIME("RapidJSON Insitu", d.ParseInsitu(buffer).HasParseError(), false, - memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, true); - BEST_TIME("sajson (dynamic mem)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + BEST_TIME("sajson (dynamic mem)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); size_t astbuffersize = p.size(); size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t)); - BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); std::string json11err; - if(all) BEST_TIME("dropbox (json11) ", (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + if(all) BEST_TIME("dropbox (json11) ", (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); - if(all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + if(all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); JsonValue value; JsonAllocator allocator; char *endptr; - if(all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); + if(all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.data(), p.size()), repeat, volume, true); void *state; - if(all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true); - BEST_TIME("memcpy ", (memcpy(buffer, p.c_str(), p.size()) == buffer), true, , repeat, volume, true); + if(all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + BEST_TIME("memcpy ", (memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat, volume, true); free(ast_buffer); free(buffer); } diff --git a/include/simdjson/jsonminifier.h b/include/simdjson/jsonminifier.h index 3187d5a5..64389f87 100644 --- a/include/simdjson/jsonminifier.h +++ b/include/simdjson/jsonminifier.h @@ -14,6 +14,6 @@ static inline size_t jsonminify(const char *buf, size_t len, char *out) { } -static inline size_t jsonminify(const simdjsonstring & p, char *out) { - return jsonminify(p.c_str(), p.size(), out); +static inline size_t jsonminify(const std::string_view & p, char *out) { + return jsonminify(p.data(), p.size(), out); } \ No newline at end of file diff --git a/src/jsonioutil.cpp b/src/jsonioutil.cpp index b294e6ec..ad147cc4 100644 --- a/src/jsonioutil.cpp +++ b/src/jsonioutil.cpp @@ -10,10 +10,6 @@ char * allocate_aligned_buffer(size_t length) { if (posix_memalign((void **)&aligned_buffer, 64, totalpaddedlength)) { throw std::runtime_error("Could not allocate sufficient memory"); }; - aligned_buffer[length] = '\0'; - for(size_t i = length + 1; i < totalpaddedlength; i++) aligned_buffer[i] = 0x20; - //aligned_buffer[paddedlength] = '\0'; - //memset(aligned_buffer + length, 0x20, paddedlength - length); return aligned_buffer; } @@ -29,6 +25,7 @@ std::string_view get_corpus(std::string filename) { } std::rewind(fp); std::fread(buf, 1, len, fp); + buf[len] = '\0'; std::fclose(fp); return std::string_view(buf,len); } diff --git a/src/jsonparser.cpp b/src/jsonparser.cpp index b995220e..4e31e70e 100644 --- a/src/jsonparser.cpp +++ b/src/jsonparser.cpp @@ -11,9 +11,13 @@ bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) { bool isok = find_structural_bits(buf, len, pj); if (isok) { isok = flatten_indexes(len, pj); + } else { + return false; } if (isok) { isok = unified_machine(buf, len, pj); + } else { + return false; } return isok; } diff --git a/src/stage1_find_marks.cpp b/src/stage1_find_marks.cpp index ad71af4c..50ac039b 100644 --- a/src/stage1_find_marks.cpp +++ b/src/stage1_find_marks.cpp @@ -61,8 +61,9 @@ WARN_UNUSED // effectively the very first char is considered to follow "whitespace" for the // purposes of psuedo-structural character detection u64 prev_iter_ends_pseudo_pred = 1ULL; - - for (size_t idx = 0; idx < len; idx += 64) { + size_t lenminus64 = len + 1 < 64 ? 0 : len + 1 - 64; // len + 1 because of the NULL termination + size_t idx = 0; + for (; idx < lenminus64; idx += 64) { __builtin_prefetch(buf + idx + 128); #ifdef DEBUG cout << "Idx is " << idx << "\n"; @@ -249,21 +250,163 @@ WARN_UNUSED "final structurals and pseudo structurals after close quote removal"); *(u64 *)(pj.structurals + idx / 8) = structurals; } + + //////////////// + /// we use a giant copy-paste which is ugly. + /// but otherwise the string needs to be properly padded or else we + /// risk invalidating the UTF-8 checks. + //////////// + if (idx < len + 1) { // +1 due to NULL termination + u8 tmpbuf[64]; + memset(tmpbuf,0x20,64); + memcpy(tmpbuf,buf+idx,len - idx + 1);// +1 due to NULL termination + m256 input_lo = _mm256_loadu_si256((const m256 *)(tmpbuf + 0)); + m256 input_hi = _mm256_loadu_si256((const m256 *)(tmpbuf + 32)); +#ifdef UTF8VALIDATE + m256 highbit = _mm256_set1_epi8(0x80); + if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) { + // it is ascii, we just check continuation + has_error = _mm256_or_si256( + _mm256_cmpgt_epi8(previous.carried_continuations, + _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 1)),has_error); + + } else { + // it is not ascii so we have to do heavy work + previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error); + previous = avxcheckUTF8Bytes(input_hi, &previous, &has_error); + } +#endif + //////////////////////////////////////////////////////////////////////////////////////////// + // Step 1: detect odd sequences of backslashes + //////////////////////////////////////////////////////////////////////////////////////////// + + u64 bs_bits = + cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\')); + u64 start_edges = bs_bits & ~(bs_bits << 1); + // flip lowest if we have an odd-length run at the end of the prior + // iteration + u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; + u64 even_starts = start_edges & even_start_mask; + u64 odd_starts = start_edges & ~even_start_mask; + u64 even_carries = bs_bits + even_starts; + + u64 odd_carries; + // must record the carry-out of our odd-carries out of bit 63; this + // indicates whether the sense of any edge going to the next iteration + // should be flipped + bool iter_ends_odd_backslash = + __builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries); + + odd_carries |= + prev_iter_ends_odd_backslash; // push in bit zero as a potential end + // if we had an odd-numbered run at the + // end of the previous iteration + prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; + u64 even_carry_ends = even_carries & ~bs_bits; + u64 odd_carry_ends = odd_carries & ~bs_bits; + u64 even_start_odd_end = even_carry_ends & odd_bits; + u64 odd_start_even_end = odd_carry_ends & even_bits; + u64 odd_ends = even_start_odd_end | odd_start_even_end; + + //////////////////////////////////////////////////////////////////////////////////////////// + // Step 2: detect insides of quote pairs + //////////////////////////////////////////////////////////////////////////////////////////// + + u64 quote_bits = + cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); + quote_bits = quote_bits & ~odd_ends; + u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( + _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); + quote_mask ^= prev_iter_inside_quote; + prev_iter_inside_quote = (u64)((s64)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20 + + // How do we build up a user traversable data structure + // first, do a 'shufti' to detect structural JSON characters + // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c + // these go into the first 3 buckets of the comparison (1/2/4) + + // we are also interested in the four whitespace characters + // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + // these go into the next 2 buckets of the comparison (8/16) + const m256 low_nibble_mask = _mm256_setr_epi8( + // 0 9 a b c d + 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, + 0, 0, 8, 12, 1, 2, 9, 0, 0); + const m256 high_nibble_mask = _mm256_setr_epi8( + // 0 2 3 5 7 + 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, + 1, 0, 0, 0, 3, 2, 1, 0, 0); + + m256 structural_shufti_mask = _mm256_set1_epi8(0x7); + m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18); + + m256 v_lo = _mm256_and_si256( + _mm256_shuffle_epi8(low_nibble_mask, input_lo), + _mm256_shuffle_epi8(high_nibble_mask, + _mm256_and_si256(_mm256_srli_epi32(input_lo, 4), + _mm256_set1_epi8(0x7f)))); + + m256 v_hi = _mm256_and_si256( + _mm256_shuffle_epi8(low_nibble_mask, input_hi), + _mm256_shuffle_epi8(high_nibble_mask, + _mm256_and_si256(_mm256_srli_epi32(input_hi, 4), + _mm256_set1_epi8(0x7f)))); + m256 tmp_lo = _mm256_cmpeq_epi8( + _mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0)); + m256 tmp_hi = _mm256_cmpeq_epi8( + _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0)); + + u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo); + u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi); + u64 structurals = ~(structural_res_0 | (structural_res_1 << 32)); + + // this additional mask and transfer is non-trivially expensive, + // unfortunately + m256 tmp_ws_lo = _mm256_cmpeq_epi8( + _mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0)); + m256 tmp_ws_hi = _mm256_cmpeq_epi8( + _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); + + u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo); + u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); + u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32)); + + + // mask off anything inside quotes + structurals &= ~quote_mask; + + // add the real quote bits back into our bitmask as well, so we can + // quickly traverse the strings we've spent all this trouble gathering + structurals |= quote_bits; + + // Now, establish "pseudo-structural characters". These are non-whitespace + // characters that are (a) outside quotes and (b) have a predecessor that's + // either whitespace or a structural character. This means that subsequent + // passes will get a chance to encounter the first character of every string + // of non-whitespace and, if we're parsing an atom like true/false/null or a + // number we can stop at the first whitespace or structural character + // following it. + + // a qualified predecessor is something that can happen 1 position before an + // psuedo-structural character + u64 pseudo_pred = structurals | whitespace; + u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; + prev_iter_ends_pseudo_pred = pseudo_pred >> 63; + u64 pseudo_structurals = + shifted_pseudo_pred & (~whitespace) & (~quote_mask); + structurals |= pseudo_structurals; + + // now, we've used our close quotes all we need to. So let's switch them off + // they will be off in the quote mask and on in quote bits. + structurals &= ~(quote_bits & ~quote_mask); + *(u64 *)(pj.structurals + idx / 8) = structurals; + } if(buf[len] != '\0') { std::cerr << "Your string should be NULL terminated." << std::endl; return false; } - // we are going to zero out everything after len: - size_t count_last_64bits = len % 64; - if(count_last_64bits != 0) { // we have a "final" word where only count_last_64bits matter - u64 lastword = *(u64 *)(pj.structurals + len / 8); - printf("last word %zu \n", lastword); - printf("count_last_64bits%zu \n", count_last_64bits); - lastword &= ( UINT64_C(1) << count_last_64bits) - 1; - *(u64 *)(pj.structurals + len / 8) = lastword; - } - - //pj.structural_indexes[pj.n_structural_indexes++] = len; // the final NULL is used as a pseudo-structural character #ifdef UTF8VALIDATE return _mm256_testz_si256(has_error, has_error); #else diff --git a/src/stage2_flatten.cpp b/src/stage2_flatten.cpp index f4823ebb..11ed70ca 100644 --- a/src/stage2_flatten.cpp +++ b/src/stage2_flatten.cpp @@ -119,7 +119,7 @@ bool flatten_indexes(size_t len, ParsedJson &pj) { } pj.n_structural_indexes = base; if(len != base_ptr[pj.n_structural_indexes-1]) { - printf("last structural should be pointing at the end of the string\n"); + // can happen with malformed JSON such as unclosed quotes (["this is an unclosed string ]) return false; } base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array diff --git a/tests/allparserscheckfile.cpp b/tests/allparserscheckfile.cpp index add61aea..61a94c2b 100644 --- a/tests/allparserscheckfile.cpp +++ b/tests/allparserscheckfile.cpp @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { exit(1); } const char * filename = argv[optind]; - simdjsonstring p; + std::string_view p; try { p = get_corpus(filename); } catch (const std::exception& e) { // caught by reference to base @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) { rapidjson::Document d; char *buffer = (char *)malloc(p.size() + 1); - memcpy(buffer, p.c_str(), p.size()); + memcpy(buffer, p.data(), p.size()); buffer[p.size()] = '\0'; bool rapid_correct = (d.Parse((const char *)buffer).HasParseError() == false); bool rapid_correct_checkencoding = (d.Parse((const char *)buffer).HasParseError() == false); diff --git a/tests/jsoncheck.cpp b/tests/jsoncheck.cpp index 0d60ff5a..dad01687 100644 --- a/tests/jsoncheck.cpp +++ b/tests/jsoncheck.cpp @@ -49,7 +49,8 @@ bool validate(const char *dirname) { for (int i = 0; i < c; i++) { const char *name = entry_list[i]->d_name; if (hasExtension(name, extension)) { - //printf("validating: file %s \n", name); + printf("validating: file %s ", name); + fflush(NULL); size_t filelen = strlen(name); char *fullpath = (char *)malloc(dirlen + filelen + 1 + 1); strcpy(fullpath, dirname); @@ -74,6 +75,7 @@ bool validate(const char *dirname) { } ++howmany; bool isok = json_parse(p, pj); + printf("%s\n", isok ? "ok" : "invalid"); if(contains("EXCLUDE",name)) { // skipping howmany--; @@ -89,10 +91,7 @@ bool validate(const char *dirname) { printf("warning: file %s should fail but it passes.\n", name); everythingfine = false; } - } else { - printf("File %s %s.\n", name, - isok ? " is valid JSON " : " is not valid JSON"); - } + } free(fullpath); } } diff --git a/tools/minify.cpp b/tools/minify.cpp index c6e1104e..5fd8b9a0 100644 --- a/tools/minify.cpp +++ b/tools/minify.cpp @@ -8,7 +8,7 @@ int main(int argc, char *argv[]) { std::cerr << "Usage: " << argv[0] << " \n"; exit(1); } - simdjsonstring p; + std::string_view p; std::string filename = argv[argc - 1]; try{ p = get_corpus(filename); @@ -16,6 +16,6 @@ int main(int argc, char *argv[]) { std::cout << "Could not load the file " << filename << std::endl; return EXIT_FAILURE; } - jsonminify(p, &p[0]); + jsonminify(p, (char *)p.data()); printf("%s",p.data()); }