add load times

This commit is contained in:
parrt 2017-03-17 13:37:01 -07:00
parent aa177cf415
commit b961c86143
1 changed files with 169 additions and 74 deletions

View File

@ -13,133 +13,228 @@ import java.util.Arrays;
import java.util.List;
/** Test how fast we can lex Java and some unicode graphemes using old and
* new unicode stream mechanism.
* new unicode stream mechanism. It also tests load time for ASCII
* and unicode code points beyond 0xFFFF.
*
* Sample output on OS X with 4 GHz Intel Core i7 (us == microseconds, 1/1000 of a millisecond)
* Sample output on OS X with 4 GHz Intel Core i7 (us == microseconds, 1/1000 of a millisecond):
*
legacy_java_ascii average time 336us over 1500 runs of 29038 symbols
legacy_java_ascii average time 828us over 1500 runs of 29038 symbols DFA cleared
legacy_java_utf8 average time 281us over 1500 runs of 29038 symbols
legacy_java_utf8 average time 815us over 1500 runs of 29038 symbols DFA cleared
new_java_utf8 average time 538us over 1500 runs of 29038 symbols
new_java_utf8 average time 1074us over 1500 runs of 29038 symbols DFA cleared
legacy_grapheme_utf8 average time 6812us over 500 runs of 6614 symbols from udhr_kor.txt
legacy_grapheme_utf8 average time 7046us over 500 runs of 6614 symbols from udhr_kor.txt DFA cleared
legacy_grapheme_utf8 average time 6190us over 500 runs of 13379 symbols from udhr_hin.txt
legacy_grapheme_utf8 average time 6379us over 500 runs of 13379 symbols from udhr_hin.txt DFA cleared
new_grapheme_utf8 average time 6898us over 500 runs of 6614 symbols from udhr_kor.txt
new_grapheme_utf8 average time 7033us over 500 runs of 6614 symbols from udhr_kor.txt DFA cleared
new_grapheme_utf8 average time 6606us over 500 runs of 13379 symbols from udhr_hin.txt
new_grapheme_utf8 average time 6651us over 500 runs of 13379 symbols from udhr_hin.txt DFA cleared
new_grapheme_utf8 average time 101us over 500 runs of 85 symbols from emoji.txt
new_grapheme_utf8 average time 113us over 500 runs of 85 symbols from emoji.txt DFA cleared
Warming up Java compiler...
load_legacy_java_ascii average time 52us over 3500 loads of 29038 symbols
load_legacy_java_utf8 average time 40us over 3500 loads of 29038 symbols
load_new_java_utf8 average time 192us over 3500 loads of 29038 symbols
lex_legacy_java_ascii average time 348us over 2000 runs of 29038 symbols
lex_legacy_java_ascii average time 880us over 2000 runs of 29038 symbols DFA cleared
lex_legacy_java_utf8 average time 348us over 2000 runs of 29038 symbols
lex_legacy_java_utf8 average time 890us over 2000 runs of 29038 symbols DFA cleared
lex_new_java_utf8 average time 386us over 2000 runs of 29038 symbols
lex_new_java_utf8 average time 910us over 2000 runs of 29038 symbols DFA cleared
lex_legacy_grapheme_utf8 average time 6903us over 400 runs of 6614 symbols from udhr_kor.txt
lex_legacy_grapheme_utf8 average time 7108us over 400 runs of 6614 symbols from udhr_kor.txt DFA cleared
lex_legacy_grapheme_utf8 average time 5980us over 400 runs of 13379 symbols from udhr_hin.txt
lex_legacy_grapheme_utf8 average time 6056us over 400 runs of 13379 symbols from udhr_hin.txt DFA cleared
lex_new_grapheme_utf8 average time 6966us over 400 runs of 6614 symbols from udhr_kor.txt
lex_new_grapheme_utf8 average time 7077us over 400 runs of 6614 symbols from udhr_kor.txt DFA cleared
lex_new_grapheme_utf8 average time 6072us over 400 runs of 13379 symbols from udhr_hin.txt
lex_new_grapheme_utf8 average time 6103us over 400 runs of 13379 symbols from udhr_hin.txt DFA cleared
lex_new_grapheme_utf8 average time 100us over 400 runs of 85 symbols from emoji.txt
lex_new_grapheme_utf8 average time 110us over 400 runs of 85 symbols from emoji.txt DFA cleared
*
* The "DFA cleared" indicates that the lexer was returned to initial conditions
* before the tokenizing of each file. As the ALL(*) lexer encounters new input,
* it records how it tokenized the chars. The next time it sees that input,
* it will more quickly recognize the token.
*
* Lexing times have the top 20% stripped off before doing the average
* to account for issues with the garbage collection and compilation pauses;
* other OS tasks could also pop in randomly.
*
* Load times are too fast to measure with a microsecond clock using an SSD
* so the average load time is computed as the overall time to load
* n times divided by n (rather then summing up the individual times).
*
* @since 4.7
*/
public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn test"
public static final String Parser_java_file = "Java/src/org/antlr/v4/runtime/Parser.java";
public static final String PerfDir = "org/antlr/v4/test/runtime/java/api/perf";
public boolean output = true;
public static void main(String[] args) throws Exception {
TimeLexerSpeed tests = new TimeLexerSpeed();
int n = 1500;
tests.legacy_java_ascii(n, false);
tests.legacy_java_ascii(n, true);
tests.legacy_java_utf8(n, false);
tests.legacy_java_utf8(n, true);
tests.new_java_utf8(n, false);
tests.new_java_utf8(n, true);
n = 500;
tests.legacy_grapheme_utf8("udhr_kor.txt", n, false);
tests.legacy_grapheme_utf8("udhr_kor.txt", n, true);
tests.legacy_grapheme_utf8("udhr_hin.txt", n, false);
tests.legacy_grapheme_utf8("udhr_hin.txt", n, true);
tests.compilerWarmUp(100);
int n = 3500;
tests.load_legacy_java_ascii(n);
tests.load_legacy_java_utf8(n);
tests.load_new_java_utf8(n);
System.out.println();
n = 2000;
tests.lex_legacy_java_ascii(n, false);
tests.lex_legacy_java_ascii(n, true);
tests.lex_legacy_java_utf8(n, false);
tests.lex_legacy_java_utf8(n, true);
tests.lex_new_java_utf8(n, false);
tests.lex_new_java_utf8(n, true);
System.out.println();
n = 400;
tests.lex_legacy_grapheme_utf8("udhr_kor.txt", n, false);
tests.lex_legacy_grapheme_utf8("udhr_kor.txt", n, true);
tests.lex_legacy_grapheme_utf8("udhr_hin.txt", n, false);
tests.lex_legacy_grapheme_utf8("udhr_hin.txt", n, true);
// legacy can't handle the emoji (32 bit stuff)
tests.new_grapheme_utf8("udhr_kor.txt", n, false);
tests.new_grapheme_utf8("udhr_kor.txt", n, true);
tests.new_grapheme_utf8("udhr_hin.txt", n, false);
tests.new_grapheme_utf8("udhr_hin.txt", n, true);
tests.new_grapheme_utf8("emoji.txt", n, false);
tests.new_grapheme_utf8("emoji.txt", n, true);
tests.lex_new_grapheme_utf8("udhr_kor.txt", n, false);
tests.lex_new_grapheme_utf8("udhr_kor.txt", n, true);
tests.lex_new_grapheme_utf8("udhr_hin.txt", n, false);
tests.lex_new_grapheme_utf8("udhr_hin.txt", n, true);
tests.lex_new_grapheme_utf8("emoji.txt", n, false);
tests.lex_new_grapheme_utf8("emoji.txt", n, true);
}
public void legacy_java_ascii(int n, boolean clearLexerDFACache) throws Exception {
public void compilerWarmUp(int n) throws Exception {
System.out.print("Warming up Java compiler");
output = false;
lex_new_java_utf8(n, false);
System.out.print('.');
lex_legacy_java_utf8(n, false);
System.out.print('.');
lex_legacy_grapheme_utf8("udhr_hin.txt", n, false);
System.out.print('.');
lex_new_grapheme_utf8("udhr_hin.txt", n, false);
System.out.println();
output = true;
}
public void load_legacy_java_ascii(int n) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
long start = System.nanoTime();
CharStream input = null;
for (int i = 0; i<n; i++) {
input = new ANTLRFileStream(sampleJavaFile.getFile());
}
long stop = System.nanoTime();
long tus = (stop-start)/1000;
int size = input.size();
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
if ( output ) System.out.printf("%25s average time %5dus over %4d loads of %5d symbols\n",
currentMethodName,
tus/n,
n,
size);
}
public void load_legacy_java_utf8(int n) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
long start = System.nanoTime();
CharStream input = null;
for (int i = 0; i<n; i++) {
input = new ANTLRFileStream(sampleJavaFile.getFile(), "UTF-8");
}
long stop = System.nanoTime();
long tus = (stop-start)/1000;
int size = input.size();
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
if ( output ) System.out.printf("%25s average time %5dus over %4d loads of %5d symbols\n",
currentMethodName,
tus/n,
n,
size);
}
public void load_new_java_utf8(int n) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
long start = System.nanoTime();
CharStream input = null;
for (int i = 0; i<n; i++) {
input = CharStreams.fromPath(Paths.get(sampleJavaFile.getFile()));
}
long stop = System.nanoTime();
long tus = (stop-start)/1000;
int size = input.size();
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
if ( output ) System.out.printf("%25s average time %5dus over %4d loads of %5d symbols\n",
currentMethodName,
tus/n,
n,
size);
}
public void lex_legacy_java_ascii(int n, boolean clearLexerDFACache) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
CharStream input = new ANTLRFileStream(sampleJavaFile.getFile());
JavaLexer lexer = new JavaLexer(input);
double avg = tokenize(lexer, n, clearLexerDFACache);
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
clearLexerDFACache ? " DFA cleared" : "");
if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
clearLexerDFACache ? " DFA cleared" : "");
}
public void legacy_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
public void lex_legacy_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
CharStream input = new ANTLRFileStream(sampleJavaFile.getFile(), "UTF-8");
JavaLexer lexer = new JavaLexer(input);
double avg = tokenize(lexer, n, clearLexerDFACache);
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
clearLexerDFACache ? " DFA cleared" : "");
if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
clearLexerDFACache ? " DFA cleared" : "");
}
public void new_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
public void lex_new_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
CharStream input = CharStreams.fromPath(Paths.get(sampleJavaFile.getFile()));
JavaLexer lexer = new JavaLexer(input);
double avg = tokenize(lexer, n, clearLexerDFACache);
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
clearLexerDFACache ? " DFA cleared" : "");
if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
clearLexerDFACache ? " DFA cleared" : "");
}
public void legacy_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
public void lex_legacy_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(PerfDir+"/"+fileName);
CharStream input = new ANTLRFileStream(sampleJavaFile.getFile(), "UTF-8");
graphemesLexer lexer = new graphemesLexer(input);
double avg = tokenize(lexer, n, clearLexerDFACache);
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
fileName,
clearLexerDFACache ? " DFA cleared" : "");
if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
fileName,
clearLexerDFACache ? " DFA cleared" : "");
}
public void new_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
public void lex_new_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(PerfDir+"/"+fileName);
CharStream input = CharStreams.fromPath(Paths.get(sampleJavaFile.getFile()));
graphemesLexer lexer = new graphemesLexer(input);
double avg = tokenize(lexer, n, clearLexerDFACache);
String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
fileName,
clearLexerDFACache ? " DFA cleared" : "");
if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
currentMethodName,
(int)avg,
n,
input.size(),
fileName,
clearLexerDFACache ? " DFA cleared" : "");
}
public double tokenize(Lexer lexer, int n, boolean clearLexerDFACache) {
@ -155,7 +250,7 @@ public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn t
// int size = lexer.getInputStream().size();
long stop = System.nanoTime();
times[i] = (stop-start)/1000;
// System.out.printf("Tokenized %d char in %dus\n", size, times[i]);
// if ( output ) System.out.printf("Tokenized %d char in %dus\n", size, times[i]);
}
Arrays.sort(times);
times = Arrays.copyOfRange(times, 0, times.length-(int)(n*.2)); // drop highest 20% of times