From 1aa40b88e703c3a77613346b1562cb9ccc37ae96 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 6 Apr 2018 09:46:29 -0400 Subject: [PATCH 1/9] Minor update --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4e37ce97..ddf9f4d8 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Of course, stage 4 is totally unimplemented so it might be a priority as well: > Using this parallel bit stream approach, the vast majority of conditional branches used to identify key positions and/or syntax errors at each parsing position are mostly eliminated, which, as Section 6.2 shows, minimizes branch misprediction penalties. Accurate parsing and parallel lexical analysis is done through processor-friendly equations that require neither speculation nor multithreading. - Deshmukh, V. M., and G. R. Bamnote. "An empirical evaluation of optimization parameters in XML parsing for performance enhancement." Computer, Communication and Control (IC4), 2015 International Conference on. IEEE, 2015. -APA +APA - Moussalli, Roger, et al. "Efficient XML Path Filtering Using GPUs." ADMS@ VLDB. 2011. @@ -97,6 +97,10 @@ APA - http://rapidjson.org/md_doc_sax.html - https://github.com/Geal/parser_benchmarks/tree/master/json - Gron: A command line tool that makes JSON greppable https://news.ycombinator.com/item?id=16727665 +- GoogleGson https://github.com/google/gson +- Jackson https://github.com/FasterXML/jackson +- https://www.yelp.com/dataset_challenge +- RapidJSON. http://rapidjson.org/ Inspiring links: - https://auth0.com/blog/beating-json-performance-with-protobuf/ @@ -152,6 +156,6 @@ containing structural element ("up"). - The ``clmul`` thing is tricky but nice. (Geoff's remark: find the spaces between quotes, is actually a ponderous way of doing parallel prefix over XOR, which a mathematically adept person would have realized could be done with clmul by -1. Not me, I had to look it up: http://bitmath.blogspot.com.au/2016/11/parallel-prefixsuffix-operations.html.) - It is possible, though maybe unlikely, that parallelizing the bitset decoding could be useful (https://lemire.me/blog/2018/03/08/iterating-over-set-bits-quickly-simd-edition/), and there is VCOMPRESSB (AVX-512) -## Future work +## Future work Long term we should keep in mind the idea that what would be cool is a method to extract something like this code from an abstract description of something closer to a grammar. From 3b32b11fa69e2953b2f51e01a8efc095a9de19ed Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 6 Apr 2018 10:53:51 -0400 Subject: [PATCH 2/9] Colorful display. --- main.cpp | 80 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 15 deletions(-) diff --git a/main.cpp b/main.cpp index 4528e81f..630235be 100644 --- a/main.cpp +++ b/main.cpp @@ -12,7 +12,7 @@ #include #include #include "common_defs.h" - + using namespace std; #define DEBUG @@ -31,7 +31,7 @@ inline void dump256(m256 d, string msg) { cout << " " << msg << "\n"; } -// dump bits low to high +// dump bits low to high void dumpbits(u64 v, string msg) { for (u32 i = 0; i < 64; i++) { std::cout << (((v>>(u64)i) & 0x1ULL) ? "1" : "_"); @@ -55,7 +55,7 @@ ifstream is(filename, ios::binary); throw "Allocation failed"; }; memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64)); - memcpy(aligned_buffer, buffer.str().c_str(), length); + memcpy(aligned_buffer, buffer.str().c_str(), length); is.close(); return make_pair((u8 *)aligned_buffer, length); } @@ -88,14 +88,14 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) { // Useful constant masks const u64 even_bits = 0x5555555555555555ULL; - const u64 odd_bits = ~even_bits; + const u64 odd_bits = ~even_bits; // for now, just work in 64-byte chunks // we have padded the input out to 64 byte multiple with the remainder being zeros // persistent state across loop u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value - u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones + u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones u64 prev_iter_pseudo_structural_carry = 0ULL; for (size_t idx = 0; idx < len; idx+=64) { @@ -108,7 +108,7 @@ never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & } else { cout << '_'; } - } + } cout << "| ... input\n"; #endif m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0)); @@ -126,11 +126,11 @@ never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & // flip lowest if we have an odd-length run at the end of the prior iteration u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; u64 even_starts = start_edges & even_start_mask; - u64 odd_starts = start_edges & ~even_start_mask; + u64 odd_starts = start_edges & ~even_start_mask; dumpbits(even_starts, "even_starts"); dumpbits(odd_starts, "odd_starts"); - + u64 even_carries = bs_bits + even_starts; u64 odd_carries; @@ -158,9 +158,9 @@ never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & u64 odd_ends = even_start_odd_end | odd_start_even_end; dumpbits(odd_ends, "odd_ends"); - + //////////////////////////////////////////////////////////////////////////////////////////// - // Step 2: detect insides of quote pairs + // Step 2: detect insides of quote pairs //////////////////////////////////////////////////////////////////////////////////////////// u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); @@ -227,7 +227,7 @@ never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & // mask off anything inside quotes structurals &= ~quote_mask; - + // whitespace inside our quotes also doesn't count; otherwise " foo" would generate a spurious // pseudo-structural-character at 'foo' whitespace &= ~quote_mask; @@ -245,9 +245,9 @@ never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & // Slightly more painful than it would seem. It's possible that either structurals or whitespace are // all 1s (e.g. {{{{{{{....{{{{x64, or a really long whitespace). As such there is no safe place - // to add a '1' from the previous iteration without *that* triggering the carry we are looking + // to add a '1' from the previous iteration without *that* triggering the carry we are looking // out for, so we must check both carries for overflow - + u64 tmp = structurals | whitespace; u64 tmp2; bool ps_carry = __builtin_uaddll_overflow(tmp, structurals, &tmp2); @@ -260,7 +260,7 @@ never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & tmp3 &= ~whitespace; dumpbits(tmp3, "pseudo_structural add calculation without quotes and whitespace"); dumpbits(structurals, "final structurals without quotes"); - structurals |= tmp3; + structurals |= tmp3; dumpbits(structurals, "final structurals and pseudo structurals"); *(u64 *)(pj.structurals + idx/8) = structurals; @@ -366,6 +366,55 @@ never_inline bool json_parse(const u8 * buf, UNUSED size_t len, ParsedJson & pj) return true; } +// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal +namespace Color { + enum Code { + FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32, + FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36, + FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91, + FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94, + FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97, + BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49 + }; + class Modifier { + Code code; + public: + Modifier(Code pCode) : code(pCode) {} + friend std::ostream& + operator<<(std::ostream& os, const Modifier& mod) { + return os << "\033[" << mod.code << "m"; + } + }; +} + +void colorfuldisplay(ParsedJson & pj, const u8 * buf) { + Color::Modifier greenfg(Color::FG_GREEN); + Color::Modifier yellowfg(Color::FG_YELLOW); + Color::Modifier deffg(Color::FG_DEFAULT); + size_t i = 0; + // skip initial fluff + while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){ + i++; + } + for (; i < pj.n_structural_indexes; i++) { + u32 idx = pj.structural_indexes[i]; + u8 c = buf[idx]; + if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [ + std::cout << greenfg << buf[idx] << deffg; + } else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ] + std::cout << greenfg << buf[idx] << deffg; + } else { + std::cout << yellowfg << buf[idx] << deffg; + } + if(i + 1 < pj.n_structural_indexes) { + u32 nextidx = pj.structural_indexes[i + 1]; + for(u32 pos = idx + 1 ; pos < nextidx; pos++) { + std::cout << buf[pos]; + } + } + } + std::cout << std::endl; +} int main(int argc, char * argv[]) { if (argc != 2) { cerr << "Usage: " << argv[0] << " \n"; @@ -382,7 +431,7 @@ int main(int argc, char * argv[]) { // we have potentially 1 structure per byte of input // as well as a dummy structure and a root structure // we also potentially write up to 7 iterations beyond - // in our 'cheesy flatten', so make some worst-case + // in our 'cheesy flatten', so make some worst-case // sapce for that too u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7; pj.structural_indexes = new u32[max_structures]; @@ -404,6 +453,7 @@ int main(int argc, char * argv[]) { std::chrono::duration secs = end - start; res[i] = secs.count(); } + colorfuldisplay(pj, p.first); double min_result = *min_element(res.begin(), res.end()); cout << "Min: " << min_result << " bytes read: " << p.second << " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0) << "\n"; return 0; From b55e8c01a3004e4add19e0207e3510d88251404f Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 6 Apr 2018 11:22:49 -0400 Subject: [PATCH 3/9] Making my adversarial example a bit harder. --- jsonexamples/adversarial.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jsonexamples/adversarial.json b/jsonexamples/adversarial.json index 74d6a8d2..70209aa8 100644 --- a/jsonexamples/adversarial.json +++ b/jsonexamples/adversarial.json @@ -1,9 +1,9 @@ { "\"Name rue": [ - 116, + [ 116, "\"", 234, "true", - false + false ] ] } From 980f69dc6760077ddfdf8f775cdbd383b068ceb7 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 6 Apr 2018 17:14:04 -0400 Subject: [PATCH 4/9] New benchmark --- scalarvssimd/Makefile | 6 + scalarvssimd/avxprocessing.h | 49 ++++---- scalarvssimd/bench.cpp | 54 +++++++++ scalarvssimd/benchmark.h | 196 ++++++++++++++++++++++++++++++++ scalarvssimd/common_defs.h | 2 +- scalarvssimd/demo.cpp | 0 scalarvssimd/jsonstruct.h | 67 +++++++++++ scalarvssimd/scalarprocessing.h | 49 ++++++-- 8 files changed, 395 insertions(+), 28 deletions(-) create mode 100644 scalarvssimd/Makefile create mode 100644 scalarvssimd/bench.cpp create mode 100644 scalarvssimd/benchmark.h delete mode 100644 scalarvssimd/demo.cpp diff --git a/scalarvssimd/Makefile b/scalarvssimd/Makefile new file mode 100644 index 00000000..07d8e79a --- /dev/null +++ b/scalarvssimd/Makefile @@ -0,0 +1,6 @@ +HEADERS:=avxprocessing.h benchmark.h common_defs.h jsonstruct.h scalarprocessing.h util.h +bench: bench.cpp $(HEADERS) + $(CXX) -O3 -o $@ bench.cpp -march=native -lm -Wall -Wextra + +clean: + rm -f bench diff --git a/scalarvssimd/avxprocessing.h b/scalarvssimd/avxprocessing.h index 924903fc..08ff7ff8 100644 --- a/scalarvssimd/avxprocessing.h +++ b/scalarvssimd/avxprocessing.h @@ -18,7 +18,7 @@ using namespace std; // a straightforward comparison of a mask against input. 5 uops; would be cheaper in AVX512. -really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) { +static inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) { m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask); u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0); m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask); @@ -26,7 +26,7 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask return res_0 | (res_1 << 32); } -never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) { +static bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) { // Useful constant masks const u64 even_bits = 0x5555555555555555ULL; const u64 odd_bits = ~even_bits; @@ -81,12 +81,10 @@ never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); quote_bits = quote_bits & ~odd_ends; - dumpbits(quote_bits, "quote_bits"); u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); quote_mask ^= prev_iter_inside_quote; prev_iter_inside_quote = (u64)((s64)quote_mask>>63); - dumpbits(quote_mask, "quote_mask"); // How do we build up a user traversable data structure // first, do a 'shufti' to detect structural JSON characters @@ -184,17 +182,31 @@ const u32 ROOT_NODE = 1; // just transform the bitmask to a big list of 32-bit integers for now // that's all; the type of character the offset points to will // tell us exactly what we need to know. Naive but straightforward implementation -never_inline bool flatten_indexes(size_t len, ParsedJson & pj) { +static bool flatten_indexes(size_t len, ParsedJson & pj) { u32 base = NUM_RESERVED_NODES; u32 * base_ptr = pj.structural_indexes; base_ptr[DUMMY_NODE] = base_ptr[ROOT_NODE] = 0; // really shouldn't matter for (size_t idx = 0; idx < len; idx+=64) { u64 s = *(u64 *)(pj.structurals + idx/8); + u32 cnt = __builtin_popcountll(s); + u32 next_base = base + cnt; while (s) { - u32 si = (u32)idx + __builtin_ctzll(s); - base_ptr[base++] = si; - s &= s - 1ULL; + // spoil the suspense + u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset + u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset + + base_ptr[base+0] = (u32)idx + __builtin_ctzll(s); u64 s1 = s & (s - 1ULL); + base_ptr[base+1] = (u32)idx + __builtin_ctzll(s1); u64 s2 = s1 & (s1 - 1ULL); + base_ptr[base+2] = (u32)idx + __builtin_ctzll(s2); //u64 s3 = s2 & (s2 - 1ULL); + base_ptr[base+3] = (u32)idx + __builtin_ctzll(s3); u64 s4 = s3 & (s3 - 1ULL); + + base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL); + base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL); + base_ptr[base+6] = (u32)idx + __builtin_ctzll(s6); u64 s7 = s6 & (s6 - 1ULL); + s = s7; + base += 7; } + base = next_base; } pj.n_structural_indexes = base; return true; @@ -202,7 +214,7 @@ never_inline bool flatten_indexes(size_t len, ParsedJson & pj) { // Parse our json given a big array of 32-bit integers telling us where // the interesting stuff is -bool avx_json_parse(const u8 * buf, UNUSED size_t len, ParsedJson & pj) { +static bool json_parse(const u8 * buf, UNUSED size_t len, ParsedJson & pj) { u32 last; // index of previous structure at this level or 0 if none u32 up; // index of structure that contains this one @@ -240,16 +252,13 @@ bool avx_json_parse(const u8 * buf, UNUSED size_t len, ParsedJson & pj) { nodes[n.prev].next = i; } dummy.next = DUMMY_NODE; // dummy.next is a sump for meaningless 'nexts', clear it -#ifdef DEBUG - for (u32 i = 0; i < pj.n_structural_indexes; i++) { - u32 idx = pj.structural_indexes[i]; - JsonNode & n = nodes[i]; - cout << "i: " << i; - cout << " n.up: " << n.up; - cout << " n.next: " << n.next; - cout << " n.prev: " << n.prev; - cout << " idx: " << idx << " buf[idx] " << buf[idx] << "\n"; - } -#endif return true; } + + +static bool avx_json_parse(const u8 * buf, size_t len, ParsedJson & pj) { + find_structural_bits(buf, len, pj); + flatten_indexes(len, pj); + json_parse(buf, len, pj); + return true; +} diff --git a/scalarvssimd/bench.cpp b/scalarvssimd/bench.cpp new file mode 100644 index 00000000..3cfa9530 --- /dev/null +++ b/scalarvssimd/bench.cpp @@ -0,0 +1,54 @@ +#include "jsonstruct.h" + +#include "scalarprocessing.h" +#include "avxprocessing.h" +#include "benchmark.h" +#include "util.h" + +//colorfuldisplay(ParsedJson & pj, const u8 * buf) +//BEST_TIME_NOCHECK(dividearray32(array, N), , repeat, N, timings,true); + + +int main(int argc, char * argv[]) { + if (argc < 2) { + cerr << "Usage: " << argv[0] << " \n"; + cerr << "Or " << argv[0] << " -v \n"; + exit(1); + } + bool verbose = false; + if (argc > 2) { + if(strcmp(argv[1],"-v")) verbose = true; + } + pair p = get_corpus(argv[argc - 1]); + ParsedJson pj; + std::cout << "Input has "<< p.second << " bytes."< +#include +#ifdef __x86_64__ + +const char *unitname = "cycles"; + +#define RDTSC_START(cycles) \ + do { \ + uint32_t cyc_high, cyc_low; \ + __asm volatile("cpuid\n" \ + "rdtsc\n" \ + "mov %%edx, %0\n" \ + "mov %%eax, %1" \ + : "=r"(cyc_high), "=r"(cyc_low) \ + : \ + : /* no read only */ \ + "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \ + ); \ + (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ + } while (0) + +#define RDTSC_STOP(cycles) \ + do { \ + uint32_t cyc_high, cyc_low; \ + __asm volatile("rdtscp\n" \ + "mov %%edx, %0\n" \ + "mov %%eax, %1\n" \ + "cpuid" \ + : "=r"(cyc_high), "=r"(cyc_low) \ + : /* no read only registers */ \ + : "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \ + ); \ + (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ + } while (0) + +#else +const char *unitname = " (clock units) "; + +#define RDTSC_START(cycles) \ + do { \ + cycles = clock(); \ + } while (0) + +#define RDTSC_STOP(cycles) \ + do { \ + cycles = clock(); \ + } while (0) +#endif + +static __attribute__((noinline)) uint64_t rdtsc_overhead_func(uint64_t dummy) { + return dummy; +} + +uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; + +#define RDTSC_SET_OVERHEAD(test, repeat) \ + do { \ + uint64_t cycles_start, cycles_final, cycles_diff; \ + uint64_t min_diff = UINT64_MAX; \ + for (int i = 0; i < repeat; i++) { \ + __asm volatile("" ::: /* pretend to clobber */ "memory"); \ + RDTSC_START(cycles_start); \ + test; \ + RDTSC_STOP(cycles_final); \ + cycles_diff = (cycles_final - cycles_start); \ + if (cycles_diff < min_diff) \ + min_diff = cycles_diff; \ + } \ + global_rdtsc_overhead = min_diff; \ + } while (0) + +/* + * Prints the best number of operations per cycle where + * test is the function call, answer is the expected answer generated by + * test, repeat is the number of times we should repeat and size is the + * number of operations represented by test. + */ +#define BEST_TIME(test, expected, pre, repeat, size, verbose) \ + do { \ + if (global_rdtsc_overhead == UINT64_MAX) { \ + RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \ + } \ + if (verbose) \ + printf("%-60s\t: ", #test); \ + fflush(NULL); \ + uint64_t cycles_start, cycles_final, cycles_diff; \ + uint64_t min_diff = (uint64_t)-1; \ + uint64_t sum_diff = 0; \ + for (int i = 0; i < repeat; i++) { \ + pre; \ + __asm volatile("" ::: /* pretend to clobber */ "memory"); \ + RDTSC_START(cycles_start); \ + if (test != expected) { \ + printf("not expected (%d , %d )", (int)test, (int)expected); \ + break; \ + } \ + RDTSC_STOP(cycles_final); \ + cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \ + if (cycles_diff < min_diff) \ + min_diff = cycles_diff; \ + sum_diff += cycles_diff; \ + } \ + uint64_t S = size; \ + float cycle_per_op = (min_diff) / (double)S; \ + if (verbose) \ + printf(" %.3f %s per operation (best) ", cycle_per_op, unitname); \ + if (verbose) \ + printf("\t%.3f %s per operation (avg) ", avg_cycle_per_op, unitname); \ + if (verbose) \ + printf("\n"); \ + if (!verbose) \ + printf(" %.3f ", cycle_per_op); \ + fflush(NULL); \ + } while (0) + +// like BEST_TIME, but no check +#define BEST_TIME_NOCHECK(test, pre, repeat, size, verbose) \ + do { \ + if (global_rdtsc_overhead == UINT64_MAX) { \ + RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \ + } \ + if (verbose) \ + printf("%-40s\t: ", #test); \ + fflush(NULL); \ + uint64_t cycles_start, cycles_final, cycles_diff; \ + uint64_t min_diff = (uint64_t)-1; \ + uint64_t sum_diff = 0; \ + for (int i = 0; i < repeat; i++) { \ + pre; \ + __asm volatile("" ::: /* pretend to clobber */ "memory"); \ + RDTSC_START(cycles_start); \ + test; \ + RDTSC_STOP(cycles_final); \ + cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \ + if (cycles_diff < min_diff) \ + min_diff = cycles_diff; \ + sum_diff += cycles_diff; \ + } \ + uint64_t S = size; \ + float cycle_per_op = (min_diff) / (double)S; \ + float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \ + if (verbose) \ + printf(" %.3f %s per operation (best) ", cycle_per_op, unitname); \ + if (verbose) \ + printf(" %.3f %s per operation (avg) ", avg_cycle_per_op, unitname); \ + if (verbose) \ + printf("\n"); \ + if (!verbose) \ + printf(" %.3f ", cycle_per_op); \ + fflush(NULL); \ + } while (0) + +// like BEST_TIME except that we run a function to check the result +#define BEST_TIME_CHECK(test, check, pre, repeat, size, verbose) \ + do { \ + if (global_rdtsc_overhead == UINT64_MAX) { \ + RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \ + } \ + if (verbose) \ + printf("%-60s\t: ", #test); \ + fflush(NULL); \ + uint64_t cycles_start, cycles_final, cycles_diff; \ + uint64_t min_diff = (uint64_t)-1; \ + uint64_t sum_diff = 0; \ + for (int i = 0; i < repeat; i++) { \ + pre; \ + __asm volatile("" ::: /* pretend to clobber */ "memory"); \ + RDTSC_START(cycles_start); \ + test; \ + RDTSC_STOP(cycles_final); \ + if (!check) { \ + printf("error"); \ + break; \ + } \ + cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \ + if (cycles_diff < min_diff) \ + min_diff = cycles_diff; \ + sum_diff += cycles_diff; \ + } \ + uint64_t S = size; \ + float cycle_per_op = (min_diff) / (double)S; \ + float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \ + if (verbose) \ + printf(" %.3f cycles per operation (best) ", cycle_per_op); \ + if (verbose) \ + printf("\t%.3f cycles per operation (avg) ", avg_cycle_per_op); \ + if (verbose) \ + printf("\n"); \ + if (!verbose) \ + printf(" %.3f ", cycle_per_op); \ + fflush(NULL); \ + } while (0) + +#endif diff --git a/scalarvssimd/common_defs.h b/scalarvssimd/common_defs.h index 72730cca..301b0cb0 100644 --- a/scalarvssimd/common_defs.h +++ b/scalarvssimd/common_defs.h @@ -1,5 +1,5 @@ #pragma once - +#include typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; diff --git a/scalarvssimd/demo.cpp b/scalarvssimd/demo.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/scalarvssimd/jsonstruct.h b/scalarvssimd/jsonstruct.h index d766924e..41c5f5db 100644 --- a/scalarvssimd/jsonstruct.h +++ b/scalarvssimd/jsonstruct.h @@ -1,5 +1,6 @@ #pragma once +#include "common_defs.h" struct JsonNode { u32 up; @@ -13,3 +14,69 @@ struct ParsedJson { u32 * structural_indexes; JsonNode * nodes; }; + +#include +#include +#include + +// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal +namespace Color { + enum Code { + FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32, + FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36, + FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91, + FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94, + FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97, + BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49 + }; + class Modifier { + Code code; + public: + Modifier(Code pCode) : code(pCode) {} + friend std::ostream& + operator<<(std::ostream& os, const Modifier& mod) { + return os << "\033[" << mod.code << "m"; + } + }; +} + +void colorfuldisplay(ParsedJson & pj, const u8 * buf) { + Color::Modifier greenfg(Color::FG_GREEN); + Color::Modifier yellowfg(Color::FG_YELLOW); + Color::Modifier deffg(Color::FG_DEFAULT); + size_t i = 0; + // skip initial fluff + while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){ + i++; + } + for (; i < pj.n_structural_indexes; i++) { + u32 idx = pj.structural_indexes[i]; + u8 c = buf[idx]; + if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [ + std::cout << greenfg << buf[idx] << deffg; + } else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ] + std::cout << greenfg << buf[idx] << deffg; + } else { + std::cout << yellowfg << buf[idx] << deffg; + } + if(i + 1 < pj.n_structural_indexes) { + u32 nextidx = pj.structural_indexes[i + 1]; + for(u32 pos = idx + 1 ; pos < nextidx; pos++) { + std::cout << buf[pos]; + } + } + } + std::cout << std::endl; +} + +void debugdisplay(ParsedJson & pj, const u8 * buf) { + for (u32 i = 0; i < pj.n_structural_indexes; i++) { + u32 idx = pj.structural_indexes[i]; + JsonNode & n = pj.nodes[i]; + std::cout << "i: " << i; + std::cout << " n.up: " << n.up; + std::cout << " n.next: " << n.next; + std::cout << " n.prev: " << n.prev; + std::cout << " idx: " << idx << " buf[idx] " << buf[idx] << std::endl; + } +} diff --git a/scalarvssimd/scalarprocessing.h b/scalarvssimd/scalarprocessing.h index a1701b27..85a45440 100644 --- a/scalarvssimd/scalarprocessing.h +++ b/scalarvssimd/scalarprocessing.h @@ -1,44 +1,79 @@ #include "common_defs.h" #include "jsonstruct.h" +bool is_valid_escape(char c) { + return (c == '"') || (c == '\\') || (c == '/') || (c == 'b') || (c == 'f') || (c == 'n') || (c == 'r') || (c == 't') || (c == 'u'); +} + bool scalar_json_parse(const u8 * buf, size_t len, ParsedJson & pj) { // this is a naive attempt at this point // it will probably be subject to failures given adversarial inputs size_t pos = 0; size_t last = 0; size_t up = 0; + + const u32 DUMMY_NODE = 0; + const u32 ROOT_NODE = 1; + pj.structural_indexes[DUMMY_NODE] = 0; + pj.structural_indexes[ROOT_NODE] = 0; + JsonNode & dummy = pj.nodes[DUMMY_NODE]; + JsonNode & root = pj.nodes[ROOT_NODE]; + dummy.prev = dummy.up = DUMMY_NODE; + dummy.next = 0; + root.prev = DUMMY_NODE; + root.up = ROOT_NODE; + root.next = 0; + + last = up = ROOT_NODE; + + pos = 2; for(size_t i = 0; i < len; i++) { JsonNode & n = pj.nodes[pos]; - switch buf[i] { + switch (buf[i]) { case '[': case '{': + pj.structural_indexes[pos] = i; n.prev = last; + pj.nodes[last].next = pos;// two-way linked list n.up = up; - up = pos; + up = pos;// new possible up last = 0; pos += 1; + break; case ']': case '}': + pj.structural_indexes[pos] = i; n.prev = up; + n.next = 0;// necessary? + pj.nodes[up].next = pos;// two-way linked list n.up = pj.nodes[up].up; up = pj.nodes[up].up; - last = pos; + last = pos;// potential previous pos += 1; break; - + case '"': case ':': case ',': + pj.structural_indexes[pos] = i; n.prev = last; + n.next = 0;// necessary + pj.nodes[last].next = pos;// two-way linked list n.up = up; - last = pos; + last = pos;// potential previous pos += 1; break; + case '\\': + if(i == len - 1) return false; + if(!is_valid_escape(buf[i+1])) return false; + i = i + 1; // skip valid escape default: // nothing + break; } - n.next = 0; - nodes[n.prev].next = pos; + } pj.n_structural_indexes = pos; + dummy.next = DUMMY_NODE; // dummy.next is a sump for meaningless 'nexts', clear it + return true; } From 121f024be02046af2a5a3b274d7d5ce36d6f43dc Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 6 Apr 2018 17:29:19 -0400 Subject: [PATCH 5/9] Cleaning. --- scalarvssimd/Makefile | 6 +++--- scalarvssimd/README.md | 3 +++ scalarvssimd/{ => benchmarks}/bench.cpp | 11 +++++++++-- scalarvssimd/{ => include}/avxprocessing.h | 0 scalarvssimd/{ => include}/benchmark.h | 4 ++-- scalarvssimd/{ => include}/common_defs.h | 0 scalarvssimd/{ => include}/jsonstruct.h | 0 scalarvssimd/{ => include}/scalarprocessing.h | 0 scalarvssimd/{ => include}/util.h | 0 scalarvssimd/run.sh | 11 +++++++++++ 10 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 scalarvssimd/README.md rename scalarvssimd/{ => benchmarks}/bench.cpp (85%) rename scalarvssimd/{ => include}/avxprocessing.h (100%) rename scalarvssimd/{ => include}/benchmark.h (98%) rename scalarvssimd/{ => include}/common_defs.h (100%) rename scalarvssimd/{ => include}/jsonstruct.h (100%) rename scalarvssimd/{ => include}/scalarprocessing.h (100%) rename scalarvssimd/{ => include}/util.h (100%) create mode 100755 scalarvssimd/run.sh diff --git a/scalarvssimd/Makefile b/scalarvssimd/Makefile index 07d8e79a..c3f99ea0 100644 --- a/scalarvssimd/Makefile +++ b/scalarvssimd/Makefile @@ -1,6 +1,6 @@ -HEADERS:=avxprocessing.h benchmark.h common_defs.h jsonstruct.h scalarprocessing.h util.h -bench: bench.cpp $(HEADERS) - $(CXX) -O3 -o $@ bench.cpp -march=native -lm -Wall -Wextra +HEADERS:=include/avxprocessing.h include/benchmark.h include/common_defs.h include/jsonstruct.h include/scalarprocessing.h include/util.h +bench: benchmarks/bench.cpp $(HEADERS) + $(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -Iinclude -march=native -lm -Wall -Wextra clean: rm -f bench diff --git a/scalarvssimd/README.md b/scalarvssimd/README.md new file mode 100644 index 00000000..6068a149 --- /dev/null +++ b/scalarvssimd/README.md @@ -0,0 +1,3 @@ +``` +./run.sh +``` diff --git a/scalarvssimd/bench.cpp b/scalarvssimd/benchmarks/bench.cpp similarity index 85% rename from scalarvssimd/bench.cpp rename to scalarvssimd/benchmarks/bench.cpp index 3cfa9530..5624b4b7 100644 --- a/scalarvssimd/bench.cpp +++ b/scalarvssimd/benchmarks/bench.cpp @@ -21,7 +21,14 @@ int main(int argc, char * argv[]) { } pair p = get_corpus(argv[argc - 1]); ParsedJson pj; - std::cout << "Input has "<< p.second << " bytes."< 1024 * 1024) + std::cout << p.second / (1024*1024) << " MB "; + else if (p.second > 1024) + std::cout << p.second / 1024 << " KB "; + else + std::cout << p.second << " B "; + std::cout << std::endl; if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) { throw "Allocation failed"; @@ -46,7 +53,7 @@ int main(int argc, char * argv[]) { colorfuldisplay(pj, p.first); debugdisplay(pj,p.first); } - int repeat = 5; + int repeat = 10; int volume = p.second; BEST_TIME_NOCHECK(avx_json_parse(p.first, p.second, pj), , repeat, volume, true); BEST_TIME_NOCHECK(scalar_json_parse(p.first, p.second, pj), , repeat, volume, true); diff --git a/scalarvssimd/avxprocessing.h b/scalarvssimd/include/avxprocessing.h similarity index 100% rename from scalarvssimd/avxprocessing.h rename to scalarvssimd/include/avxprocessing.h diff --git a/scalarvssimd/benchmark.h b/scalarvssimd/include/benchmark.h similarity index 98% rename from scalarvssimd/benchmark.h rename to scalarvssimd/include/benchmark.h index 39b84509..59209d97 100644 --- a/scalarvssimd/benchmark.h +++ b/scalarvssimd/include/benchmark.h @@ -142,9 +142,9 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; float cycle_per_op = (min_diff) / (double)S; \ float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \ if (verbose) \ - printf(" %.3f %s per operation (best) ", cycle_per_op, unitname); \ + printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \ if (verbose) \ - printf(" %.3f %s per operation (avg) ", avg_cycle_per_op, unitname); \ + printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \ if (verbose) \ printf("\n"); \ if (!verbose) \ diff --git a/scalarvssimd/common_defs.h b/scalarvssimd/include/common_defs.h similarity index 100% rename from scalarvssimd/common_defs.h rename to scalarvssimd/include/common_defs.h diff --git a/scalarvssimd/jsonstruct.h b/scalarvssimd/include/jsonstruct.h similarity index 100% rename from scalarvssimd/jsonstruct.h rename to scalarvssimd/include/jsonstruct.h diff --git a/scalarvssimd/scalarprocessing.h b/scalarvssimd/include/scalarprocessing.h similarity index 100% rename from scalarvssimd/scalarprocessing.h rename to scalarvssimd/include/scalarprocessing.h diff --git a/scalarvssimd/util.h b/scalarvssimd/include/util.h similarity index 100% rename from scalarvssimd/util.h rename to scalarvssimd/include/util.h diff --git a/scalarvssimd/run.sh b/scalarvssimd/run.sh new file mode 100755 index 00000000..3caf1580 --- /dev/null +++ b/scalarvssimd/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +cd $SCRIPTPATH +make bench +echo +for i in $SCRIPTPATH/../jsonexamples/*.json; do + [ -f "$i" ] || break + echo $i + $SCRIPTPATH/bench $i + echo +done From 938c63db67e576cd31a0073bbad89f5c5f9de8ff Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 6 Apr 2018 17:30:42 -0400 Subject: [PATCH 6/9] Added a note --- scalarvssimd/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scalarvssimd/run.sh b/scalarvssimd/run.sh index 3caf1580..91e80ec6 100755 --- a/scalarvssimd/run.sh +++ b/scalarvssimd/run.sh @@ -1,4 +1,5 @@ #!/bin/bash +echo "Note: the SIMD parser does a bit more work." SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" cd $SCRIPTPATH make bench From 81385d85aa9b1641dcda7a16427e9ae730ff61d8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 10 Apr 2018 15:23:06 -0400 Subject: [PATCH 7/9] json is not javascript. --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index ddf9f4d8..7136077f 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,11 @@ Inspiring links: - The JSON spec defines what a JSON parser is: > A JSON parser transforms a JSON text into another representation. A JSON parser MUST accept all texts that conform to the JSON grammar. A JSON parser MAY accept non-JSON forms or extensions. An implementation may set limits on the size of texts that it accepts. An implementation may set limits on the maximum depth of nesting. An implementation may set limits on the range and precision of numbers. An implementation may set limits on the length and character contents of strings." + +- JSON is not JavaScript: + +> All JSON is Javascript but NOT all Javascript is JSON. So {property:1} is invalid because property does not have double quotes around it. {'property':1} is also invalid, because it's single quoted while the only thing that can placate the JSON specification is double quoting. JSON is even fussy enough that {"property":.1} is invalid too, because you should have of course written {"property":0.1}. Also, don't even think about having comments or semicolons, you guessed it: they're invalid. (credit:https://github.com/elzr/vim-json) + - The structural characters are: From c4df1e27f4f8a3b02bb1e1fac273f0868a2565bf Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 10 Apr 2018 16:07:31 -0400 Subject: [PATCH 8/9] some remarks --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7136077f..861f2b5e 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ Inspiring links: - Values must be one of false / null / true / object / array / number / string - - A string begins and ends with quotation marks. All Unicode characters may be placed within the quotation marks, except for the characters that must be escaped: quotation mark, reverse solidus, and the control characters (U+0000 through U+001F). [Decoding UTF-8 is fun](https://github.com/skeeto/branchless-utf8/blob/master/utf8.h). + - A string begins and ends with quotation marks. All Unicode characters may be placed within the quotation marks, except for the characters that must be escaped: quotation mark, reverse solidus, and the control characters (U+0000 through U+001F). We can probably safely assume that strings are in UTF-8. [Decoding UTF-8 is fun](https://github.com/skeeto/branchless-utf8/blob/master/utf8.h). However, any character can be escaped in JSON string and escaping them might be required? Well, maybe you can quickly check whether a string needs escaping. - Regarding strings, Geoff wrote: > For example, in Stage 2 ("string detection") we could validate that the only place we saw backslashes was in places we consider "inside strings". From 46d55fa6ce49749c9d0d15e3561945440d6536f4 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 13 Apr 2018 19:52:02 -0400 Subject: [PATCH 9/9] pseudo-structural --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 861f2b5e..eed8fdb4 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,19 @@ prev structural element at the same level containing structural element ("up"). +### Pseudo-structural elements + +A character is pseudo-structural if and only if: + +1. Not enclosed in quotes, AND +2. Is a non-whitespace character, AND +3. It's preceding chararacter is either: +(a) a structural character, OR +(b) whitespace. + +This helps as we redefine some new characters as pseudo-structural such as the characters 1, 1, G, n in the following: + +> { "foo" : 1.5, "bar" : 1.5 GEOFF_IS_A_DUMMY bla bla , "baz", null } ## Remarks on the code