Trying a detailed analysis.

This commit is contained in:
Daniel Lemire 2018-12-19 21:23:37 -05:00
parent 7d37dd5dea
commit 20133963bc
6 changed files with 73 additions and 17 deletions

View File

@ -25,6 +25,7 @@ endif
MAINEXECUTABLES=parse minify json2json
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
@ -85,6 +86,16 @@ $(UJSON4C_INCLUDE):
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
parse_noutf8validation: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse_noutf8validation -DSIMDJSON_SKIPUTF8VALIDATION $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
parse_nonumberparsing: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse_nonumberparsing -DSIMDJSON_SKIPNUMBERPARSING $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
parse_nostringparsing: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse_nostringparsing -DSIMDJSON_SKIPSTRINGPARSING $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
@ -115,11 +126,9 @@ parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBF
distinctuseridcompetition: benchmark/distinctuseridcompetition.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o distinctuseridcompetition $(LIBFILES) benchmark/distinctuseridcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) #$(EXTRAOBJECTS)
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
#$(EXTRADEPSINCLUDE)
#$(EXTRAOBJECTS)
allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(EXTRAOBJECTS)
$(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp $(EXTRAOBJECTS) -I. $(LIBFLAGS) $(COREDEPSINCLUDE) $(EXTRADEPSINCLUDE)
@ -132,7 +141,7 @@ cppcheck:
clean:
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES) $(SUPPLEMENTARYEXECUTABLES)
cleandist:
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES) $(SUPPLEMENTARYEXECUTABLES)

View File

@ -340,6 +340,10 @@ static really_inline bool parse_number(const u8 *const buf,
ParsedJson &pj,
const u32 offset,
bool found_minus) {
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes useful to skip parsing
pj.write_tape_s64(0); // always write zero
return true; // always succeeds
#else
const char *p = (const char *)(buf + offset);
bool negative = false;
if (found_minus) {
@ -493,4 +497,5 @@ static really_inline bool parse_number(const u8 *const buf,
#endif
}
return is_structural_or_whitespace(*p);
#endif // SIMDJSON_SKIPNUMBERPARSING
}

View File

@ -61,7 +61,10 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
WARN_UNUSED
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
using namespace std;
#ifdef SIMDJSON_SKIPSTRINGPARSING // for performance analysis, it is sometimes useful to skip parsing
pj.write_tape(0, '"');// don't bother with the string parsing at all
return true; // always succeeds
#else
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
u8 *dst = pj.current_string_buf_loc;
#ifdef JSON_TEST_STRINGS // for unit testing
@ -195,6 +198,7 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
}
// can't be reached
return true;
#endif // SIMDJSON_SKIPSTRINGPARSING
}

View File

@ -13,15 +13,35 @@ if [ "$os" = "Linux" ]; then
echo "You are using linux."
echo "We are going to just parse using simdjson, and collect perf stats."
make parse
make parse parse_noutf8validation parse_nonumberparsing parse_nostringparsing
myfile=$plotdirectory"/parselinuxtable.txt"
echo $myfile
echo "" > $myfile
myfile_noutf8validation=$plotdirectory"/parselinuxtable_noutf8validation.txt"
echo $myfile_noutf8validation
echo "" > $myfile_noutf8validation
myfile=$plotdirectory"/parselinuxtable_nonumberparsing.txt"
echo $myfile_nonumberparsing
echo "" > $myfile_nonumberparsing
myfile=$plotdirectory"/parselinuxtable_nostringparsing.txt"
echo $myfile_nostringparsing
echo "" > $myfile_nostringparsing
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
$SCRIPTPATH/../parse -t "$i" >> "$myfile"
$SCRIPTPATH/../parse_noutf8validation -t "$i" >> "$myfile_noutf8validation"
$SCRIPTPATH/../parse_nonumberparsing -t "$i" >> "$myfile_nonumberparsing"
$SCRIPTPATH/../parse_nostringparsing -t "$i" >> "$myfile_nostringparsing"
done
paste $myfile $myfile_noutf8validation $myfile_nonumberparsing $myfile_nostringparsing > $myfile.tmp
mv $myfile.tmp $myfile
rm $myfile_noutf8validation $myfile_nonumberparsing $myfile_nostringparsing
gnuplot -e "filename='$myfile';name='$plotdirectory/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot
fi

View File

@ -21,7 +21,8 @@ set ytics nomirror
set yrange [0:]
set key right
#set key right
set key outside
set style data histograms
set style histogram rowstacked
set xtic rotate by 300 scale 1
@ -29,6 +30,20 @@ set xtic rotate by 300 scale 1
set style line 1 lt rgb "#A00000" lw 1 pt 1 ps 1
set style line 2 lt rgb "#00A000" lw 1 pt 1 ps 1
set style line 3 lt rgb "#5060D0" lw 1 pt 1 ps 1
set style line 4 lt rgb "#FF1493" lw 1 pt 1 ps 1
set style line 4 lt rgb "red" lw 1 pt 1 ps 1
set style line 5 lt rgb "#808000" lw 1 pt 1 ps 1
set style line 6 lt rgb "#00008B" lw 1 pt 1 ps 1
set style line 7 lt rgb "black" lw 1 pt 1 ps 1
set style line 8 lt rgb "blue" lw 1 pt 1 ps 1
set style line 9 lt rgb "violet" lw 1 pt 1 ps 1
plot filename using 3 t "stage 1" ls 2, '' using 4 t "stage 2" ls 3, '' using 5:xtic(1) t "stage 3" ls 1
# plot filename using 3 t "stage 1" ls 2, '' using 4 t "stage 2" ls 3, '' using 5:xtic(1) t "stage 3" ls 1
plot filename using 8 t "stage 1 without utf8 validation" ls 1, '' using ($3-$8) t "utf8 validation (stage 1)" ls 2, '' using 4 t "stage 2" ls 3, '' using ($20 + $15 - $5) t "stage 3 (no number or string)" ls 4, '' using ($5 - $20) t "string parsing (stage 3)" ls 5, '' using ($5 - $15):xtic(1) t "number parsing (stage 3)" ls 6
# 1, 2 mem, 3 st1, 4 st2, 5 st3
# 6, 7 mem, 8 st1, 9 st2, 10 st3 // noutf8
# 11, 12 mem, 13 st1, 14 st2, 15 st3 // nonumber
# 16, 17 mem, 18 st1, 19 st2, 20 st3 // nostring
# string: $5 - $20 , number $5 - $15, no string no number $5 - ($5 - $20) - ($5 - $15) = $20 + $15 - $5

View File

@ -10,11 +10,14 @@
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
#define UTF8VALIDATE
#ifndef SIMDJSON_SKIPUTF8VALIDATION
#define SIMDJSON_UTF8VALIDATE
#endif
// It seems that many parsers do UTF-8 validation.
// RapidJSON does not do it by default, but a flag
// allows it.
#ifdef UTF8VALIDATE
#ifdef SIMDJSON_UTF8VALIDATE
#include "simdjson/simdutf8check.h"
#endif
using namespace std;
@ -37,7 +40,7 @@ WARN_UNUSED
cerr << "Your ParsedJson object only supports documents up to "<< pj.bytecapacity << " bytes but you are trying to process " << len << " bytes\n";
return false;
}
#ifdef UTF8VALIDATE
#ifdef SIMDJSON_UTF8VALIDATE
__m256i has_error = _mm256_setzero_si256();
struct avx_processed_utf_bytes previous = {
.rawbytes = _mm256_setzero_si256(),
@ -78,7 +81,7 @@ WARN_UNUSED
#endif
m256 input_lo = _mm256_loadu_si256((const m256 *)(buf + idx + 0));
m256 input_hi = _mm256_loadu_si256((const m256 *)(buf + idx + 32));
#ifdef UTF8VALIDATE
#ifdef SIMDJSON_UTF8VALIDATE
m256 highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
// it is ascii, we just check continuation
@ -261,7 +264,7 @@ WARN_UNUSED
memcpy(tmpbuf,buf+idx,len - idx);
m256 input_lo = _mm256_loadu_si256((const m256 *)(tmpbuf + 0));
m256 input_hi = _mm256_loadu_si256((const m256 *)(tmpbuf + 32));
#ifdef UTF8VALIDATE
#ifdef SIMDJSON_UTF8VALIDATE
m256 highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
// it is ascii, we just check continuation
@ -402,7 +405,7 @@ WARN_UNUSED
structurals &= ~(quote_bits & ~quote_mask);
*(u64 *)(pj.structurals + idx / 8) = structurals;
}
#ifdef UTF8VALIDATE
#ifdef SIMDJSON_UTF8VALIDATE
return _mm256_testz_si256(has_error, has_error);
#else
return true;