Trying a detailed analysis.
This commit is contained in:
parent
7d37dd5dea
commit
20133963bc
21
Makefile
21
Makefile
|
@ -25,6 +25,7 @@ endif
|
|||
MAINEXECUTABLES=parse minify json2json
|
||||
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
|
||||
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile
|
||||
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
||||
|
||||
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
|
||||
|
@ -85,6 +86,16 @@ $(UJSON4C_INCLUDE):
|
|||
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
||||
|
||||
parse_noutf8validation: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parse_noutf8validation -DSIMDJSON_SKIPUTF8VALIDATION $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
||||
|
||||
parse_nonumberparsing: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parse_nonumberparsing -DSIMDJSON_SKIPNUMBERPARSING $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
||||
|
||||
parse_nostringparsing: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parse_nostringparsing -DSIMDJSON_SKIPSTRINGPARSING $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
||||
|
||||
|
||||
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
|
||||
|
||||
|
@ -115,11 +126,9 @@ parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBF
|
|||
distinctuseridcompetition: benchmark/distinctuseridcompetition.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o distinctuseridcompetition $(LIBFILES) benchmark/distinctuseridcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
|
||||
|
||||
|
||||
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) #$(EXTRAOBJECTS)
|
||||
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
|
||||
#$(EXTRADEPSINCLUDE)
|
||||
#$(EXTRAOBJECTS)
|
||||
|
||||
|
||||
allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(EXTRAOBJECTS)
|
||||
$(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp $(EXTRAOBJECTS) -I. $(LIBFLAGS) $(COREDEPSINCLUDE) $(EXTRADEPSINCLUDE)
|
||||
|
@ -132,7 +141,7 @@ cppcheck:
|
|||
|
||||
|
||||
clean:
|
||||
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
|
||||
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES) $(SUPPLEMENTARYEXECUTABLES)
|
||||
|
||||
cleandist:
|
||||
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
|
||||
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES) $(SUPPLEMENTARYEXECUTABLES)
|
||||
|
|
|
@ -340,6 +340,10 @@ static really_inline bool parse_number(const u8 *const buf,
|
|||
ParsedJson &pj,
|
||||
const u32 offset,
|
||||
bool found_minus) {
|
||||
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes useful to skip parsing
|
||||
pj.write_tape_s64(0); // always write zero
|
||||
return true; // always succeeds
|
||||
#else
|
||||
const char *p = (const char *)(buf + offset);
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
|
@ -493,4 +497,5 @@ static really_inline bool parse_number(const u8 *const buf,
|
|||
#endif
|
||||
}
|
||||
return is_structural_or_whitespace(*p);
|
||||
#endif // SIMDJSON_SKIPNUMBERPARSING
|
||||
}
|
||||
|
|
|
@ -61,7 +61,10 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
|
|||
WARN_UNUSED
|
||||
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
|
||||
ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
|
||||
using namespace std;
|
||||
#ifdef SIMDJSON_SKIPSTRINGPARSING // for performance analysis, it is sometimes useful to skip parsing
|
||||
pj.write_tape(0, '"');// don't bother with the string parsing at all
|
||||
return true; // always succeeds
|
||||
#else
|
||||
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
|
||||
u8 *dst = pj.current_string_buf_loc;
|
||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||
|
@ -195,6 +198,7 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
|
|||
}
|
||||
// can't be reached
|
||||
return true;
|
||||
#endif // SIMDJSON_SKIPSTRINGPARSING
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,15 +13,35 @@ if [ "$os" = "Linux" ]; then
|
|||
echo "You are using linux."
|
||||
echo "We are going to just parse using simdjson, and collect perf stats."
|
||||
|
||||
make parse
|
||||
make parse parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
||||
myfile=$plotdirectory"/parselinuxtable.txt"
|
||||
echo $myfile
|
||||
echo "" > $myfile
|
||||
|
||||
myfile_noutf8validation=$plotdirectory"/parselinuxtable_noutf8validation.txt"
|
||||
echo $myfile_noutf8validation
|
||||
echo "" > $myfile_noutf8validation
|
||||
|
||||
myfile=$plotdirectory"/parselinuxtable_nonumberparsing.txt"
|
||||
echo $myfile_nonumberparsing
|
||||
echo "" > $myfile_nonumberparsing
|
||||
|
||||
myfile=$plotdirectory"/parselinuxtable_nostringparsing.txt"
|
||||
echo $myfile_nostringparsing
|
||||
echo "" > $myfile_nostringparsing
|
||||
|
||||
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
$SCRIPTPATH/../parse -t "$i" >> "$myfile"
|
||||
$SCRIPTPATH/../parse_noutf8validation -t "$i" >> "$myfile_noutf8validation"
|
||||
$SCRIPTPATH/../parse_nonumberparsing -t "$i" >> "$myfile_nonumberparsing"
|
||||
$SCRIPTPATH/../parse_nostringparsing -t "$i" >> "$myfile_nostringparsing"
|
||||
done
|
||||
paste $myfile $myfile_noutf8validation $myfile_nonumberparsing $myfile_nostringparsing > $myfile.tmp
|
||||
mv $myfile.tmp $myfile
|
||||
rm $myfile_noutf8validation $myfile_nonumberparsing $myfile_nostringparsing
|
||||
gnuplot -e "filename='$myfile';name='$plotdirectory/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot
|
||||
fi
|
||||
|
||||
|
|
|
@ -21,7 +21,8 @@ set ytics nomirror
|
|||
|
||||
set yrange [0:]
|
||||
|
||||
set key right
|
||||
#set key right
|
||||
set key outside
|
||||
set style data histograms
|
||||
set style histogram rowstacked
|
||||
set xtic rotate by 300 scale 1
|
||||
|
@ -29,6 +30,20 @@ set xtic rotate by 300 scale 1
|
|||
set style line 1 lt rgb "#A00000" lw 1 pt 1 ps 1
|
||||
set style line 2 lt rgb "#00A000" lw 1 pt 1 ps 1
|
||||
set style line 3 lt rgb "#5060D0" lw 1 pt 1 ps 1
|
||||
set style line 4 lt rgb "#FF1493" lw 1 pt 1 ps 1
|
||||
set style line 4 lt rgb "red" lw 1 pt 1 ps 1
|
||||
set style line 5 lt rgb "#808000" lw 1 pt 1 ps 1
|
||||
set style line 6 lt rgb "#00008B" lw 1 pt 1 ps 1
|
||||
set style line 7 lt rgb "black" lw 1 pt 1 ps 1
|
||||
set style line 8 lt rgb "blue" lw 1 pt 1 ps 1
|
||||
set style line 9 lt rgb "violet" lw 1 pt 1 ps 1
|
||||
|
||||
plot filename using 3 t "stage 1" ls 2, '' using 4 t "stage 2" ls 3, '' using 5:xtic(1) t "stage 3" ls 1
|
||||
# plot filename using 3 t "stage 1" ls 2, '' using 4 t "stage 2" ls 3, '' using 5:xtic(1) t "stage 3" ls 1
|
||||
plot filename using 8 t "stage 1 without utf8 validation" ls 1, '' using ($3-$8) t "utf8 validation (stage 1)" ls 2, '' using 4 t "stage 2" ls 3, '' using ($20 + $15 - $5) t "stage 3 (no number or string)" ls 4, '' using ($5 - $20) t "string parsing (stage 3)" ls 5, '' using ($5 - $15):xtic(1) t "number parsing (stage 3)" ls 6
|
||||
|
||||
|
||||
|
||||
# 1, 2 mem, 3 st1, 4 st2, 5 st3
|
||||
# 6, 7 mem, 8 st1, 9 st2, 10 st3 // noutf8
|
||||
# 11, 12 mem, 13 st1, 14 st2, 15 st3 // nonumber
|
||||
# 16, 17 mem, 18 st1, 19 st2, 20 st3 // nostring
|
||||
# string: $5 - $20 , number $5 - $15, no string no number $5 - ($5 - $20) - ($5 - $15) = $20 + $15 - $5
|
||||
|
|
|
@ -10,11 +10,14 @@
|
|||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
#define UTF8VALIDATE
|
||||
#ifndef SIMDJSON_SKIPUTF8VALIDATION
|
||||
#define SIMDJSON_UTF8VALIDATE
|
||||
#endif
|
||||
|
||||
// It seems that many parsers do UTF-8 validation.
|
||||
// RapidJSON does not do it by default, but a flag
|
||||
// allows it.
|
||||
#ifdef UTF8VALIDATE
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
#include "simdjson/simdutf8check.h"
|
||||
#endif
|
||||
using namespace std;
|
||||
|
@ -37,7 +40,7 @@ WARN_UNUSED
|
|||
cerr << "Your ParsedJson object only supports documents up to "<< pj.bytecapacity << " bytes but you are trying to process " << len << " bytes\n";
|
||||
return false;
|
||||
}
|
||||
#ifdef UTF8VALIDATE
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
__m256i has_error = _mm256_setzero_si256();
|
||||
struct avx_processed_utf_bytes previous = {
|
||||
.rawbytes = _mm256_setzero_si256(),
|
||||
|
@ -78,7 +81,7 @@ WARN_UNUSED
|
|||
#endif
|
||||
m256 input_lo = _mm256_loadu_si256((const m256 *)(buf + idx + 0));
|
||||
m256 input_hi = _mm256_loadu_si256((const m256 *)(buf + idx + 32));
|
||||
#ifdef UTF8VALIDATE
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
m256 highbit = _mm256_set1_epi8(0x80);
|
||||
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
||||
// it is ascii, we just check continuation
|
||||
|
@ -261,7 +264,7 @@ WARN_UNUSED
|
|||
memcpy(tmpbuf,buf+idx,len - idx);
|
||||
m256 input_lo = _mm256_loadu_si256((const m256 *)(tmpbuf + 0));
|
||||
m256 input_hi = _mm256_loadu_si256((const m256 *)(tmpbuf + 32));
|
||||
#ifdef UTF8VALIDATE
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
m256 highbit = _mm256_set1_epi8(0x80);
|
||||
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
||||
// it is ascii, we just check continuation
|
||||
|
@ -402,7 +405,7 @@ WARN_UNUSED
|
|||
structurals &= ~(quote_bits & ~quote_mask);
|
||||
*(u64 *)(pj.structurals + idx / 8) = structurals;
|
||||
}
|
||||
#ifdef UTF8VALIDATE
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
return _mm256_testz_si256(has_error, has_error);
|
||||
#else
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue