add load times

2017-03-17 13:37:01 -07:00 · 2017-03-17 13:37:01 -07:00 · b961c86143
parent aa177cf415
commit b961c86143
1 changed files with 169 additions and 74 deletions
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java
@ -13,133 +13,228 @@ import java.util.Arrays;
 import java.util.List;

 /** Test how fast we can lex Java and some unicode graphemes using old and
- *  new unicode stream mechanism.
+ *  new unicode stream mechanism. It also tests load time for ASCII
+ *  and unicode code points beyond 0xFFFF.
 *
- *  Sample output on OS X with 4 GHz Intel Core i7 (us == microseconds, 1/1000 of a millisecond)
+ *  Sample output on OS X with 4 GHz Intel Core i7 (us == microseconds, 1/1000 of a millisecond):
 *
-	 legacy_java_ascii average time   336us over 1500 runs of 29038 symbols
-	 legacy_java_ascii average time   828us over 1500 runs of 29038 symbols DFA cleared
-	  legacy_java_utf8 average time   281us over 1500 runs of 29038 symbols
-	  legacy_java_utf8 average time   815us over 1500 runs of 29038 symbols DFA cleared
-	     new_java_utf8 average time   538us over 1500 runs of 29038 symbols
-	     new_java_utf8 average time  1074us over 1500 runs of 29038 symbols DFA cleared
-  legacy_grapheme_utf8 average time  6812us over  500 runs of  6614 symbols from udhr_kor.txt
-  legacy_grapheme_utf8 average time  7046us over  500 runs of  6614 symbols from udhr_kor.txt DFA cleared
-  legacy_grapheme_utf8 average time  6190us over  500 runs of 13379 symbols from udhr_hin.txt
-  legacy_grapheme_utf8 average time  6379us over  500 runs of 13379 symbols from udhr_hin.txt DFA cleared
-	 new_grapheme_utf8 average time  6898us over  500 runs of  6614 symbols from udhr_kor.txt
-	 new_grapheme_utf8 average time  7033us over  500 runs of  6614 symbols from udhr_kor.txt DFA cleared
-	 new_grapheme_utf8 average time  6606us over  500 runs of 13379 symbols from udhr_hin.txt
-	 new_grapheme_utf8 average time  6651us over  500 runs of 13379 symbols from udhr_hin.txt DFA cleared
-	 new_grapheme_utf8 average time   101us over  500 runs of    85 symbols from emoji.txt
-	 new_grapheme_utf8 average time   113us over  500 runs of    85 symbols from emoji.txt DFA cleared
+ Warming up Java compiler...
+    load_legacy_java_ascii average time    52us over 3500 loads of 29038 symbols
+     load_legacy_java_utf8 average time    40us over 3500 loads of 29038 symbols
+        load_new_java_utf8 average time   192us over 3500 loads of 29038 symbols
+
+     lex_legacy_java_ascii average time   348us over 2000 runs of 29038 symbols
+     lex_legacy_java_ascii average time   880us over 2000 runs of 29038 symbols DFA cleared
+      lex_legacy_java_utf8 average time   348us over 2000 runs of 29038 symbols
+      lex_legacy_java_utf8 average time   890us over 2000 runs of 29038 symbols DFA cleared
+         lex_new_java_utf8 average time   386us over 2000 runs of 29038 symbols
+         lex_new_java_utf8 average time   910us over 2000 runs of 29038 symbols DFA cleared
+
+  lex_legacy_grapheme_utf8 average time  6903us over  400 runs of  6614 symbols from udhr_kor.txt
+  lex_legacy_grapheme_utf8 average time  7108us over  400 runs of  6614 symbols from udhr_kor.txt DFA cleared
+  lex_legacy_grapheme_utf8 average time  5980us over  400 runs of 13379 symbols from udhr_hin.txt
+  lex_legacy_grapheme_utf8 average time  6056us over  400 runs of 13379 symbols from udhr_hin.txt DFA cleared
+     lex_new_grapheme_utf8 average time  6966us over  400 runs of  6614 symbols from udhr_kor.txt
+     lex_new_grapheme_utf8 average time  7077us over  400 runs of  6614 symbols from udhr_kor.txt DFA cleared
+     lex_new_grapheme_utf8 average time  6072us over  400 runs of 13379 symbols from udhr_hin.txt
+     lex_new_grapheme_utf8 average time  6103us over  400 runs of 13379 symbols from udhr_hin.txt DFA cleared
+     lex_new_grapheme_utf8 average time   100us over  400 runs of    85 symbols from emoji.txt
+     lex_new_grapheme_utf8 average time   110us over  400 runs of    85 symbols from emoji.txt DFA cleared
 *
 *  The "DFA cleared" indicates that the lexer was returned to initial conditions
 *  before the tokenizing of each file.  As the ALL(*) lexer encounters new input,
 *  it records how it tokenized the chars. The next time it sees that input,
 *  it will more quickly recognize the token.
 *
+ *  Lexing times have the top 20% stripped off before doing the average
+ *  to account for issues with the garbage collection and compilation pauses;
+ *  other OS tasks could also pop in randomly.
+ *
+ *  Load times are too fast to measure with a microsecond clock using an SSD
+ *  so the average load time is computed as the overall time to load
+ *  n times divided by n (rather then summing up the individual times).
+ *
 *  @since 4.7
 */
 public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn test"
 	public static final String Parser_java_file = "Java/src/org/antlr/v4/runtime/Parser.java";
 	public static final String PerfDir = "org/antlr/v4/test/runtime/java/api/perf";

+	public boolean output = true;
+
 	public static void main(String[] args) throws Exception {
 		TimeLexerSpeed tests = new TimeLexerSpeed();
-		int n = 1500;
-		tests.legacy_java_ascii(n, false);
-		tests.legacy_java_ascii(n, true);
-		tests.legacy_java_utf8(n, false);
-		tests.legacy_java_utf8(n, true);
-		tests.new_java_utf8(n, false);
-		tests.new_java_utf8(n, true);

-		n = 500;
-		tests.legacy_grapheme_utf8("udhr_kor.txt", n, false);
-		tests.legacy_grapheme_utf8("udhr_kor.txt", n, true);
-		tests.legacy_grapheme_utf8("udhr_hin.txt", n, false);
-		tests.legacy_grapheme_utf8("udhr_hin.txt", n, true);
+		tests.compilerWarmUp(100);
+
+		int n = 3500;
+		tests.load_legacy_java_ascii(n);
+		tests.load_legacy_java_utf8(n);
+		tests.load_new_java_utf8(n);
+		System.out.println();
+
+		n = 2000;
+		tests.lex_legacy_java_ascii(n, false);
+		tests.lex_legacy_java_ascii(n, true);
+		tests.lex_legacy_java_utf8(n, false);
+		tests.lex_legacy_java_utf8(n, true);
+		tests.lex_new_java_utf8(n, false);
+		tests.lex_new_java_utf8(n, true);
+		System.out.println();
+
+		n = 400;
+		tests.lex_legacy_grapheme_utf8("udhr_kor.txt", n, false);
+		tests.lex_legacy_grapheme_utf8("udhr_kor.txt", n, true);
+		tests.lex_legacy_grapheme_utf8("udhr_hin.txt", n, false);
+		tests.lex_legacy_grapheme_utf8("udhr_hin.txt", n, true);
 		// legacy can't handle the emoji (32 bit stuff)

-		tests.new_grapheme_utf8("udhr_kor.txt", n, false);
-		tests.new_grapheme_utf8("udhr_kor.txt", n, true);
-		tests.new_grapheme_utf8("udhr_hin.txt", n, false);
-		tests.new_grapheme_utf8("udhr_hin.txt", n, true);
-		tests.new_grapheme_utf8("emoji.txt", n, false);
-		tests.new_grapheme_utf8("emoji.txt", n, true);
+		tests.lex_new_grapheme_utf8("udhr_kor.txt", n, false);
+		tests.lex_new_grapheme_utf8("udhr_kor.txt", n, true);
+		tests.lex_new_grapheme_utf8("udhr_hin.txt", n, false);
+		tests.lex_new_grapheme_utf8("udhr_hin.txt", n, true);
+		tests.lex_new_grapheme_utf8("emoji.txt", n, false);
+		tests.lex_new_grapheme_utf8("emoji.txt", n, true);
 	}

-	public void legacy_java_ascii(int n, boolean clearLexerDFACache) throws Exception {
+	public void compilerWarmUp(int n) throws Exception {
+		System.out.print("Warming up Java compiler");
+		output = false;
+		lex_new_java_utf8(n, false);
+		System.out.print('.');
+		lex_legacy_java_utf8(n, false);
+		System.out.print('.');
+		lex_legacy_grapheme_utf8("udhr_hin.txt", n, false);
+		System.out.print('.');
+		lex_new_grapheme_utf8("udhr_hin.txt", n, false);
+		System.out.println();
+		output = true;
+	}
+
+	public void load_legacy_java_ascii(int n) throws Exception {
+		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
+		long start = System.nanoTime();
+		CharStream input = null;
+		for (int i = 0; i<n; i++) {
+			input = new ANTLRFileStream(sampleJavaFile.getFile());
+		}
+		long stop = System.nanoTime();
+		long tus = (stop-start)/1000;
+		int size = input.size();
+		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
+		if ( output ) System.out.printf("%25s average time %5dus over %4d loads of %5d symbols\n",
+		                                currentMethodName,
+		                                tus/n,
+		                                n,
+		                                size);
+	}
+
+	public void load_legacy_java_utf8(int n) throws Exception {
+		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
+		long start = System.nanoTime();
+		CharStream input = null;
+		for (int i = 0; i<n; i++) {
+			input = new ANTLRFileStream(sampleJavaFile.getFile(), "UTF-8");
+		}
+		long stop = System.nanoTime();
+		long tus = (stop-start)/1000;
+		int size = input.size();
+		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
+		if ( output ) System.out.printf("%25s average time %5dus over %4d loads of %5d symbols\n",
+		                                currentMethodName,
+		                                tus/n,
+		                                n,
+		                                size);
+	}
+
+	public void load_new_java_utf8(int n) throws Exception {
+		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
+		long start = System.nanoTime();
+		CharStream input = null;
+		for (int i = 0; i<n; i++) {
+			input = CharStreams.fromPath(Paths.get(sampleJavaFile.getFile()));
+		}
+		long stop = System.nanoTime();
+		long tus = (stop-start)/1000;
+		int size = input.size();
+		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
+		if ( output ) System.out.printf("%25s average time %5dus over %4d loads of %5d symbols\n",
+		                                currentMethodName,
+		                                tus/n,
+		                                n,
+		                                size);
+	}
+
+	public void lex_legacy_java_ascii(int n, boolean clearLexerDFACache) throws Exception {
 		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
 		CharStream input = new ANTLRFileStream(sampleJavaFile.getFile());
 		JavaLexer lexer = new JavaLexer(input);
 		double avg = tokenize(lexer, n, clearLexerDFACache);
 		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
-		System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
-		                  currentMethodName,
-		                  (int)avg,
-		                  n,
-		                  input.size(),
-		                  clearLexerDFACache ? " DFA cleared" : "");
+		if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
+		                                currentMethodName,
+		                                (int)avg,
+		                                n,
+		                                input.size(),
+		                                clearLexerDFACache ? " DFA cleared" : "");
 	}

-	public void legacy_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
+	public void lex_legacy_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
 		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
 		CharStream input = new ANTLRFileStream(sampleJavaFile.getFile(), "UTF-8");
 		JavaLexer lexer = new JavaLexer(input);
 		double avg = tokenize(lexer, n, clearLexerDFACache);
 		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
-		System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
-		                  currentMethodName,
-		                  (int)avg,
-		                  n,
-		                  input.size(),
-		                  clearLexerDFACache ? " DFA cleared" : "");
+		if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
+		                                currentMethodName,
+		                                (int)avg,
+		                                n,
+		                                input.size(),
+		                                clearLexerDFACache ? " DFA cleared" : "");
 	}

-	public void new_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
+	public void lex_new_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
 		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(Parser_java_file);
 		CharStream input = CharStreams.fromPath(Paths.get(sampleJavaFile.getFile()));
 		JavaLexer lexer = new JavaLexer(input);
 		double avg = tokenize(lexer, n, clearLexerDFACache);
 		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
-		System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
-		                  currentMethodName,
-		                  (int)avg,
-		                  n,
-		                  input.size(),
-		                  clearLexerDFACache ? " DFA cleared" : "");
+		if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols%s\n",
+		                                currentMethodName,
+		                                (int)avg,
+		                                n,
+		                                input.size(),
+		                                clearLexerDFACache ? " DFA cleared" : "");
 	}

-	public void legacy_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
+	public void lex_legacy_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
 		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(PerfDir+"/"+fileName);
 		CharStream input = new ANTLRFileStream(sampleJavaFile.getFile(), "UTF-8");
 		graphemesLexer lexer = new graphemesLexer(input);
 		double avg = tokenize(lexer, n, clearLexerDFACache);
 		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
-		System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
-		                  currentMethodName,
-		                  (int)avg,
-		                  n,
-		                  input.size(),
-		                  fileName,
-		                  clearLexerDFACache ? " DFA cleared" : "");
+		if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
+		                                currentMethodName,
+		                                (int)avg,
+		                                n,
+		                                input.size(),
+		                                fileName,
+		                                clearLexerDFACache ? " DFA cleared" : "");
 	}

-	public void new_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
+	public void lex_new_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
 		URL sampleJavaFile = TimeLexerSpeed.class.getClassLoader().getResource(PerfDir+"/"+fileName);
 		CharStream input = CharStreams.fromPath(Paths.get(sampleJavaFile.getFile()));
 		graphemesLexer lexer = new graphemesLexer(input);
 		double avg = tokenize(lexer, n, clearLexerDFACache);
 		String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
-		System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
-		                  currentMethodName,
-		                  (int)avg,
-		                  n,
-		                  input.size(),
-		                  fileName,
-		                  clearLexerDFACache ? " DFA cleared" : "");
+		if ( output ) System.out.printf("%25s average time %5dus over %4d runs of %5d symbols from %s%s\n",
+		                                currentMethodName,
+		                                (int)avg,
+		                                n,
+		                                input.size(),
+		                                fileName,
+		                                clearLexerDFACache ? " DFA cleared" : "");
 	}

 	public double tokenize(Lexer lexer, int n, boolean clearLexerDFACache) {
@ -155,7 +250,7 @@ public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn t
 //			int size = lexer.getInputStream().size();
 			long stop = System.nanoTime();
 			times[i] = (stop-start)/1000;
-//			System.out.printf("Tokenized %d char in %dus\n", size, times[i]);
+//			if ( output ) System.out.printf("Tokenized %d char in %dus\n", size, times[i]);
 		}
 		Arrays.sort(times);
 		times = Arrays.copyOfRange(times, 0, times.length-(int)(n*.2)); // drop highest 20% of times